home *** CD-ROM | disk | FTP | other *** search
- To: vim-dev@vim.org
- Subject: Patch 7.2.312
- Fcc: outbox
- From: Bram Moolenaar <Bram@moolenaar.net>
- Mime-Version: 1.0
- Content-Type: text/plain; charset=UTF-8
- Content-Transfer-Encoding: 8bit
- ------------
-
- Patch 7.2.312
- Problem: iconv() returns an invalid character sequence when conversion
- fails. It should return an empty string. (Yongwei Wu)
- Solution: Be more strict about invalid characters in the input.
- Files: src/mbyte.c
-
-
- *** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200
- --- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100
- ***************
- *** 133,154 ****
- static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
- static int dbcs_ptr2char __ARGS((char_u *p));
-
- ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
- ! * the first byte of a UTF-8 string. Bytes which are illegal when used as the
- ! * first byte have a one, because these will be used separately. */
- static char utf8len_tab[256] =
- {
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
- ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
- };
-
- /*
- * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
- * in the "xim.log" file.
- */
- --- 133,172 ----
- static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
- static int dbcs_ptr2char __ARGS((char_u *p));
-
- ! /*
- ! * Lookup table to quickly get the length in bytes of a UTF-8 character from
- ! * the first byte of a UTF-8 string.
- ! * Bytes which are illegal when used as the first byte have a 1.
- ! * The NUL byte has length 1.
- ! */
- static char utf8len_tab[256] =
- {
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
- };
-
- /*
- + * Like utf8len_tab above, but using a zero for illegal lead bytes.
- + */
- + static char utf8len_tab_zero[256] =
- + {
- + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
- + };
- +
- + /*
- * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
- * in the "xim.log" file.
- */
- ***************
- *** 1352,1358 ****
- if (size > 0 && *p >= 0x80)
- {
- if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
- ! return 1;
- c = utf_ptr2char(p);
- /* An illegal byte is displayed as <xx>. */
- if (utf_ptr2len(p) == 1 || c == NUL)
- --- 1370,1376 ----
- if (size > 0 && *p >= 0x80)
- {
- if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
- ! return 1; /* truncated */
- c = utf_ptr2char(p);
- /* An illegal byte is displayed as <xx>. */
- if (utf_ptr2len(p) == 1 || c == NUL)
- ***************
- *** 1473,1479 ****
- if (p[0] < 0x80) /* be quick for ASCII */
- return p[0];
-
- ! len = utf8len_tab[p[0]];
- if (len > 1 && (p[1] & 0xc0) == 0x80)
- {
- if (len == 2)
- --- 1491,1497 ----
- if (p[0] < 0x80) /* be quick for ASCII */
- return p[0];
-
- ! len = utf8len_tab_zero[p[0]];
- if (len > 1 && (p[1] & 0xc0) == 0x80)
- {
- if (len == 2)
- ***************
- *** 1723,1728 ****
- --- 1741,1747 ----
- /*
- * Return length of UTF-8 character, obtained from the first byte.
- * "b" must be between 0 and 255!
- + * Returns 1 for an invalid first byte value.
- */
- int
- utf_byte2len(b)
- ***************
- *** 1737,1742 ****
- --- 1756,1762 ----
- * Returns 1 for "".
- * Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
- * Returns number > "size" for an incomplete byte sequence.
- + * Never returns zero.
- */
- int
- utf_ptr2len_len(p, size)
- ***************
- *** 1747,1757 ****
- int i;
- int m;
-
- ! if (*p == NUL)
- ! return 1;
- ! m = len = utf8len_tab[*p];
- if (len > size)
- m = size; /* incomplete byte sequence. */
- for (i = 1; i < m; ++i)
- if ((p[i] & 0xc0) != 0x80)
- return 1;
- --- 1767,1779 ----
- int i;
- int m;
-
- ! len = utf8len_tab[*p];
- ! if (len == 1)
- ! return 1; /* NUL, ascii or illegal lead byte */
- if (len > size)
- m = size; /* incomplete byte sequence. */
- + else
- + m = len;
- for (i = 1; i < m; ++i)
- if ((p[i] & 0xc0) != 0x80)
- return 1;
- ***************
- *** 2505,2510 ****
- --- 2527,2533 ----
- /*
- * mb_head_off() function pointer.
- * Return offset from "p" to the first byte of the character it points into.
- + * If "p" points to the NUL at the end of the string return 0.
- * Returns 0 when already at the first byte of a character.
- */
- int
- ***************
- *** 2524,2530 ****
-
- /* It can't be a trailing byte when not using DBCS, at the start of the
- * string or the previous byte can't start a double-byte. */
- ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
- return 0;
-
- /* This is slow: need to start at the base and go forward until the
- --- 2547,2553 ----
-
- /* It can't be a trailing byte when not using DBCS, at the start of the
- * string or the previous byte can't start a double-byte. */
- ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
- return 0;
-
- /* This is slow: need to start at the base and go forward until the
- ***************
- *** 2552,2558 ****
- * lead byte in the current cell. */
- if (p <= base
- || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
- ! || MB_BYTE2LEN(p[-1]) == 1)
- return 0;
-
- /* This is slow: need to start at the base and go forward until the
- --- 2575,2582 ----
- * lead byte in the current cell. */
- if (p <= base
- || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
- ! || MB_BYTE2LEN(p[-1]) == 1
- ! || *p == NUL)
- return 0;
-
- /* This is slow: need to start at the base and go forward until the
- ***************
- *** 2578,2583 ****
- --- 2602,2608 ----
- char_u *q;
- char_u *s;
- int c;
- + int len;
- #ifdef FEAT_ARABIC
- char_u *j;
- #endif
- ***************
- *** 2597,2604 ****
- --q;
- /* Check for illegal sequence. Do allow an illegal byte after where we
- * started. */
- ! if (utf8len_tab[*q] != (int)(s - q + 1)
- ! && utf8len_tab[*q] != (int)(p - q + 1))
- return 0;
-
- if (q <= base)
- --- 2622,2629 ----
- --q;
- /* Check for illegal sequence. Do allow an illegal byte after where we
- * started. */
- ! len = utf8len_tab[*q];
- ! if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
- return 0;
-
- if (q <= base)
- ***************
- *** 2810,2818 ****
-
- while (end == NULL ? *p != NUL : p < end)
- {
- ! if ((*p & 0xc0) == 0x80)
- return FALSE; /* invalid lead byte */
- - l = utf8len_tab[*p];
- if (end != NULL && p + l > end)
- return FALSE; /* incomplete byte sequence */
- ++p;
- --- 2835,2843 ----
-
- while (end == NULL ? *p != NUL : p < end)
- {
- ! l = utf8len_tab_zero[*p];
- ! if (l == 0)
- return FALSE; /* invalid lead byte */
- if (end != NULL && p + l > end)
- return FALSE; /* incomplete byte sequence */
- ++p;
- ***************
- *** 6117,6128 ****
- d = retval;
- for (i = 0; i < len; ++i)
- {
- ! l = utf_ptr2len(ptr + i);
- if (l == 0)
- *d++ = NUL;
- else if (l == 1)
- {
- ! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
- {
- /* Incomplete sequence at the end. */
- *unconvlenp = len - i;
- --- 6142,6161 ----
- d = retval;
- for (i = 0; i < len; ++i)
- {
- ! l = utf_ptr2len_len(ptr + i, len - i);
- if (l == 0)
- *d++ = NUL;
- else if (l == 1)
- {
- ! int l_w = utf8len_tab_zero[ptr[i]];
- !
- ! if (l_w == 0)
- ! {
- ! /* Illegal utf-8 byte cannot be converted */
- ! vim_free(retval);
- ! return NULL;
- ! }
- ! if (unconvlenp != NULL && l_w > len - i)
- {
- /* Incomplete sequence at the end. */
- *unconvlenp = len - i;
- *** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100
- --- src/version.c 2009-12-02 15:00:23.000000000 +0100
- ***************
- *** 683,684 ****
- --- 683,686 ----
- { /* Add new patch number below this line */
- + /**/
- + 312,
- /**/
-
- --
- hundred-and-one symptoms of being an internet addict:
- 6. You refuse to go to a vacation spot with no electricity and no phone lines.
-
- /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
- /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
- \\\ download, build and distribute -- http://www.A-A-P.org ///
- \\\ help me help AIDS victims -- http://ICCF-Holland.org ///
-