home *** CD-ROM | disk | FTP | other *** search
- To: vim-dev@vim.org
- Subject: Patch 6.1.229
- Fcc: outbox
- From: Bram Moolenaar <Bram@moolenaar.net>
- Mime-Version: 1.0
- Content-Type: text/plain; charset=ISO-8859-1
- Content-Transfer-Encoding: 8bit
- ------------
-
- Patch 6.1.229
- Problem: Win32: Conversion to/from often used codepages requires the iconv
- library, which is not always available.
- Solution: Use standard MS-Windows functions for the conversion when
- possible. (mostly by Glenn Maynard)
- Also fixes missing declaration for patch 6.1.220.
- Files: src/fileio.c
-
-
- *** ../vim61.228/src/fileio.c Sun Oct 13 20:08:14 2002
- --- src/fileio.c Tue Oct 15 20:35:02 2002
- ***************
- *** 78,83 ****
- --- 78,88 ----
- # define FIO_UCS2 0x04 /* convert UCS-2 */
- # define FIO_UCS4 0x08 /* convert UCS-4 */
- # define FIO_UTF16 0x10 /* convert UTF-16 */
- + # ifdef WIN3264
- + # define FIO_CODEPAGE 0x20 /* convert MS-Windows codepage */
- + # define FIO_PUT_CP(x) (((x) & 0xffff) << 16) /* put codepage in top word */
- + # define FIO_GET_CP(x) (((x)>>16) & 0xffff) /* get codepage from top word */
- + # endif
- # define FIO_ENDIAN_L 0x80 /* little endian */
- # define FIO_ENCRYPTED 0x1000 /* encrypt written bytes */
- # define FIO_NOCONVERT 0x2000 /* skip encoding conversion */
- ***************
- *** 125,131 ****
- --- 130,140 ----
- static int get_fio_flags __ARGS((char_u *ptr));
- static char_u *check_for_bom __ARGS((char_u *p, long size, int *lenp, int flags));
- static int make_bom __ARGS((char_u *buf, char_u *name));
- + # ifdef WIN3264
- + static int get_win_fio_flags __ARGS((char_u *ptr));
- + # endif
- #endif
- + static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf));
-
- static linenr_T write_no_eol_lnum = 0; /* non-zero lnum when last line of
- next binary write should not have
- ***************
- *** 851,856 ****
- --- 861,874 ----
- else if (enc_utf8 || STRCMP(p_enc, "latin1") == 0)
- fio_flags = get_fio_flags(fenc);
-
- + # ifdef WIN3264
- + /*
- + * Conversion from an MS-Windows codepage to UTF-8 is handled here.
- + */
- + if (fio_flags == 0 && enc_utf8)
- + fio_flags = get_win_fio_flags(fenc);
- + # endif
- +
- # ifdef USE_ICONV
- /*
- * Try using iconv() if we can't convert internally.
- ***************
- *** 1007,1012 ****
- --- 1025,1034 ----
- size = (size * 2 / 3) & ~3;
- else if (fio_flags == FIO_UCSBOM)
- size = size / ICONV_MULT; /* worst case */
- + # ifdef WIN3264
- + else if (fio_flags & FIO_CODEPAGE)
- + size = size / ICONV_MULT; /* also worst case */
- + # endif
- #endif
-
- #ifdef FEAT_MBYTE
- ***************
- *** 1213,1218 ****
- --- 1235,1301 ----
- }
- # endif
-
- + # ifdef WIN3264
- + if (fio_flags & FIO_CODEPAGE)
- + {
- + /*
- + * Conversion from an MS-Windows codepage to UTF-8, using
- + * standard MS-Windows functions.
- + */
- + char_u *ucsp;
- + size_t from_size;
- + int needed;
- + char_u *p;
- + int u8c;
- +
- + /*
- + * We can't tell if the last byte of an MBCS string is valid
- + * and MultiByteToWideChar() returns zero if it isn't.
- + * Try the whole string, and if that fails, bump the last byte
- + * into conv_rest and try again.
- + */
- + from_size = size;
- + needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
- + MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size,
- + NULL, 0);
- + if (needed == 0)
- + {
- + conv_rest[0] = ptr[from_size - 1];
- + conv_restlen = 1;
- + --from_size;
- + needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
- + MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size,
- + NULL, 0);
- + }
- +
- + /* If there really is a conversion error, try using another
- + * conversion. */
- + if (needed == 0)
- + goto rewind_retry;
- +
- + /* Put the result of conversion to UCS-2 at the end of the
- + * buffer, then convert from UCS-2 to UTF-8 into the start of
- + * the buffer. If there is not enough space just fail, there
- + * is probably something wrong. */
- + ucsp = ptr + real_size - (needed * sizeof(WCHAR));
- + if (ucsp < ptr + size)
- + goto rewind_retry;
- + needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
- + MB_ERR_INVALID_CHARS, (LPCSTR)ptr,
- + from_size, (LPWSTR)ucsp, needed);
- +
- + /* Now go from UCS-2 to UTF-8. */
- + p = ptr;
- + for (; needed > 0; --needed)
- + {
- + u8c = *ucsp++;
- + u8c += (*ucsp++ << 8);
- + p += utf_char2bytes(u8c, p);
- + }
- + size = p - ptr;
- + }
- + else
- + # endif
- if (fio_flags != 0)
- {
- int u8c;
- ***************
- *** 3146,3151 ****
- --- 3229,3248 ----
- }
- }
-
- + # ifdef WIN3264
- + if (converted && wb_flags == 0 && get_win_fio_flags(fenc))
- + {
- + wb_flags = get_win_fio_flags(fenc);
- +
- + /* Convert UTF-8 -> UCS-2 and UCS-2 -> DBCS. Worst-case * 4: */
- + write_info.bw_conv_buflen = bufsize * 4;
- + write_info.bw_conv_buf
- + = lalloc((long_u)write_info.bw_conv_buflen, TRUE);
- + if (write_info.bw_conv_buf == NULL)
- + end = 0;
- + }
- + # endif
- +
- # if defined(FEAT_EVAL) || defined(USE_ICONV)
- if (converted && wb_flags == 0)
- {
- ***************
- *** 4126,4131 ****
- --- 4223,4291 ----
- }
- }
-
- + # ifdef WIN3264
- + else if (flags & FIO_CODEPAGE)
- + {
- + /*
- + * Convert UTF-8 to UCS-2 and then to MS-Windows codepage.
- + */
- + char_u *from;
- + size_t fromlen;
- + char_u *to;
- + int u8c;
- + BOOL bad = FALSE;
- +
- + if (ip->bw_restlen > 0)
- + {
- + /* Need to concatenate the remainder of the previous call and
- + * the bytes of the current call. Use the end of the
- + * conversion buffer for this. */
- + fromlen = len + ip->bw_restlen;
- + from = ip->bw_conv_buf + ip->bw_conv_buflen - fromlen;
- + mch_memmove(from, ip->bw_rest, (size_t)ip->bw_restlen);
- + mch_memmove(from + ip->bw_restlen, buf, (size_t)len);
- + }
- + else
- + {
- + from = buf;
- + fromlen = len;
- + }
- +
- + /* Convert from UTF-8 to UCS-2, to the start of the buffer.
- + * The buffer has been allocated to be big enough. */
- + to = ip->bw_conv_buf;
- + while (fromlen > 0)
- + {
- + n = utf_ptr2len_check_len(from, fromlen);
- + if (n > (int)fromlen)
- + break;
- + u8c = utf_ptr2char(from);
- + *to++ = (u8c & 0xff);
- + *to++ = (u8c >> 8);
- + fromlen -= n;
- + from += n;
- + }
- +
- + /* copy remainder to ip->bw_rest[] to be used for the next call. */
- + mch_memmove(ip->bw_rest, from, fromlen);
- + ip->bw_restlen = fromlen;
- +
- + /* Convert from UCS-2 to the codepage, using the remainder of the
- + * conversion buffer. If the conversion uses the default
- + * character "0", the data doesn't fit in this encoding, so fail. */
- + fromlen = to - ip->bw_conv_buf;
- + len = WideCharToMultiByte(FIO_GET_CP(flags), 0,
- + (LPCWSTR)ip->bw_conv_buf, (int)fromlen / sizeof(WCHAR),
- + (LPSTR)to, ip->bw_conv_buflen - fromlen, 0, &bad);
- + if (bad)
- + {
- + ip->bw_conv_error = TRUE;
- + return FAIL;
- + }
- + buf = to;
- + }
- + # endif
- +
- # ifdef USE_ICONV
- if (ip->bw_iconv_fd != (iconv_t)-1)
- {
- ***************
- *** 4364,4369 ****
- --- 4524,4544 ----
- /* must be ENC_DBCS, requires iconv() */
- return 0;
- }
- +
- + #ifdef WIN3264
- + /*
- + * Check "ptr" for a MS-Windows codepage name and return the FIO_ flags needed
- + * for the conversion MS-Windows can do for us.
- + */
- + static int
- + get_win_fio_flags(ptr)
- + char_u *ptr;
- + {
- + if (ptr[0] == 'c' && ptr[1] == 'p' && isdigit(ptr[2]))
- + return FIO_PUT_CP(atoi(ptr + 2)) | FIO_CODEPAGE;
- + return 0;
- + }
- + #endif
-
- /*
- * Check for a Unicode BOM (Byte Order Mark) at the start of p[size].
- *** ../vim61.228/src/version.c Tue Oct 15 21:05:16 2002
- --- src/version.c Tue Oct 15 21:09:55 2002
- ***************
- *** 608,609 ****
- --- 608,611 ----
- { /* Add new patch number below this line */
- + /**/
- + 229,
- /**/
-
- --
- hundred-and-one symptoms of being an internet addict:
- 235. You start naming your kids Pascal, COBOL, Algol and Fortran.
-
- /// Bram Moolenaar -- Bram@moolenaar.net -- http://www.moolenaar.net \\\
- /// Creator of Vim - Vi IMproved -- http://www.vim.org \\\
- \\\ Project leader for A-A-P -- http://www.a-a-p.org ///
- \\\ Lord Of The Rings helps Uganda - http://iccf-holland.org/lotr.html ///
-