home *** CD-ROM | disk | FTP | other *** search
- /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- *
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL. You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation. Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation. All Rights
- * Reserved.
- */
- /* is2euckr.c */
-
- #include "intlpriv.h"
-
-
- extern int MK_OUT_OF_MEMORY;
-
-
- /* net_iso2euckr(obj, isobuf, isobufsz, uncvtbuf)
- * Args:
- * isobuf: Ptr to a buf of iso-2022-kr chars
- * isobufsz: Size in bytes of isobuf
- * jismode: Ptr to encoding mode, use as arg for next call to
- * mz_iso2euckr() for rest of current 2022-kr data. First call should
- * initialize mode to ASCII (0).
- * uncvtbuf: If entire buffer was converted, uncvtbuf[0] will be nul,
- * else this points to iso-2022-kr chars that were NOT converted
- * and mz_iso2euckr() with additional iso-2022-kr chars appended.
- * Return:
- * Returns NULL on failure, otherwise it returns a pointer to a buffer of
- * converted EUC-KR characters. Caller must XP_FREE() this memory.
- *
- * Description:
- *
- * Allocate destination buffer (for EUC-KR).
- *
- * Set mode state based upon ESC sequence and SO/SI.
- *
- * If mode is KSC 5601, set 8th bits of next 2 bytes.
- *
- * If any other mode, then assume ASCII and strip the 8th bit.
- *
- * If either 2022-kr buffer does not contain complete char or EUC-KR buffer
- * is full, then return unconverted 2022-kr to caller. Caller should
- * append more data and recall mz_iso2euckr.
- */
-
-
- MODULE_PRIVATE unsigned char *
- mz_iso2euckr( CCCDataObject obj,
- const unsigned char *isobuf, /* 2022-kr buffer for conversion */
- int32 isobufsz) /* 2022-kr buffer size in bytes */
- {
- unsigned char *tobuf = NULL;
- int32 tobufsz;
- unsigned char *tobufp, *isop; /* current byte in bufs */
- unsigned char *tobufep, *isoep; /* end of buffers */
- int32 uncvtlen;
- unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
-
- #define euckrbufsz tobufsz
- #define euckrbuf tobuf
- #define euckrp tobufp
- #define euckrep tobufep
- /* Allocate a dest buffer: */
- /* 2022-kr is usually longer than EUC-KR because of ESC seq.
- *
- * In the worst case (all ASCII), converted EUC-KR will be the same
- * length as the original 2022-kr + 1 for nul byte
- */
- uncvtlen = strlen((char *)uncvtbuf);
- tobufsz = isobufsz + uncvtlen + 1;
-
- if (!tobufsz) {
- return NULL;
- }
-
- if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
- INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
- return(NULL);
- }
- /* Initialize pointers, etc. */
- isop = (unsigned char *)isobuf;
- isoep = isop + isobufsz - 1;
-
- #define uncvtp tobufp /* use tobufp as temp */
- /* If prev. unconverted chars, append unconverted
- * chars w/new chars and try to process.
- */
- if (uncvtbuf[0] != '\0') {
- uncvtp = uncvtbuf + uncvtlen;
- while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
- isop <= isoep)
- *uncvtp++ = *isop++;
- *uncvtp = '\0'; /* nul terminate */
- isop = uncvtbuf; /* process unconverted first */
- isoep = uncvtp - 1;
- }
- #undef uncvtp
-
- tobufp = tobuf;
- tobufep = tobufp + tobufsz - 2; /* save space for terminating null */
-
- WHILELOOP:
- INTL_SetCCCJismode(obj, KSC_5601_87); /* jliu doesn't want to change Tony's code too much*/
- /* While 2022-kr data && space in EUC-KR buf. */
- while ((tobufp <= tobufep) && (isop <= isoep)) {
-
- if( isop[0] == ESC && isoep - isop > 3 && ( isop[1] == '$' && isop[2] == ')'
- && isop[3] == 'C' ) ){
- /* eat that ESC seq. */
- isop += 4;
- } else if (*isop == SO) {
- /* obj->jismode |= SHIFT_OUT; */
- INTL_SetCCCJismode(obj, INTL_GetCCCJismode(obj) | SHIFT_OUT);
- isop++;
- } else if (*isop == SI) {
- INTL_SetCCCJismode(obj, INTL_GetCCCJismode(obj) & (~SHIFT_OUT));
- isop++;
- } else if (INTL_GetCCCJismode(obj) == (KSC_5601_87 | SHIFT_OUT)) {
- if(*isop == 0x20) /* jliu */
- {
- *euckrp++ = *isop++ ;
- }
- else
- {
- if ((isop+1) > isoep) /* Incomplete 2Byte char in JIS buf? */
- break;
-
- *euckrp++ = *isop++ | 0x80;
- *euckrp++ = *isop++ | 0x80;
- }
- } else if ((0xA1 <= *isop) && (*isop <= 0xFE)) {
- /* Somehow we hit EUC_KR data, let it through */
- if ((isop+1) > isoep) /* Incomplete 2Byte char in JIS buf? */
- break;
- *euckrp++ = *isop++ ;
- *euckrp++ = *isop++ ;
- } else {
- /* Unknown type: no conversion */
- *euckrp++ = *isop++ & 0x7f;
- }
- }
-
- if (uncvtbuf[0] != '\0') {
- /* Just processed unconverted chars:
- * isop pts to 1st unprocessed char in
- * isobuf. Some may have been processed
- * while processing unconverted chars,
- * so set up ptrs not to process them
- * twice.
- */
- /* If nothing was converted, this can
- * only happen if there was not
- * enough 2022-kr data. Stop and get
- * more data.
- */
- if (isop == uncvtbuf) { /* Nothing converted */
- *tobufp = '\0';
- return(NULL);
- }
- isoep = (unsigned char *)isobuf + isobufsz - 1 ;
- isop = (unsigned char *)isobuf + (isop - uncvtbuf - uncvtlen);
- uncvtbuf[0] = '\0'; /* No more uncoverted chars. */
- goto WHILELOOP; /* Process new data */
- }
-
- *tobufp = '\0'; /* null terminate dest. data */
- INTL_SetCCCLen(obj, tobufp - tobuf); /* length not counting null */
-
- if (isop <= isoep) { /* unconverted 2022-kr? */
- tobufp = uncvtbuf; /* reuse the tobufp as a TEMP */
- while (isop <= isoep)
- *tobufp++ = *isop++;
- *tobufp = '\0'; /* null terminate */
- }
- return(tobuf);
- }
-
-
-