home *** CD-ROM | disk | FTP | other *** search
- /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- *
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL. You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation. Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation. All Rights
- * Reserved.
- */
- /* sjis2jis.c */
-
- #include "intlpriv.h"
- #ifdef XP_MAC
- #include "katakana.h"
- #endif
-
- extern int MK_OUT_OF_MEMORY;
-
-
- /* SJIS to JIS Algorithm. */
- #define TwoByteSJIS2JIS(sjisp, jisp, offset) { \
- *jisp = (*sjisp++ - offset) << 1; /* assign 1st byte */ \
- if (*sjisp < 0x9F) { /* check 2nd SJIS byte */ \
- *jisp++ -= 1; /* adjust 1st JIS byte */ \
- if (*sjisp > 0x7F) \
- *jisp++ = *sjisp++ - 0x20; \
- else \
- *jisp++ = *sjisp++ - 0x1F; \
- } else { \
- jisp++; \
- *jisp++ = *sjisp++ - 0x7E; \
- } \
- }
-
- /* net_sjis2jis(obj, sjisbuf, sjisbufsz)
- * Args:
- * sjisbuf: Ptr to a buf of SJIS chars
- * sjisbufsz: Size in bytes of sjisbuf
- * jismode: Ptr to encoding mode, use as arg for next call to
- * mz_sjis2jis() for rest of current SJIS data. First call should
- * initialize mode to ASCII (0).
- * uncvtbuf: If entire buffer was converted, uncvtbuf[0] will be null,
- * else this points to SJIS chars that were NOT converted
- * and mz_sjis2jis() with additional SJIS chars appended.
- * Return:
- * Returns NULL on failure, otherwise it returns a pointer to a buffer of
- * converted SJIS characters. Caller must XP_FREE() this memory.
- *
- * Description:
- * Allocate destination JIS buffer.
- *
- * If the SJIS to JIS conversion changes JIS encoding, output proper ESC
- * sequence.
- *
- * If byte in ASCII range, just copy it to JIS buffer.
- * If Half-width SJIS katakana (1 byte), convert to Half-width JIS katakana.
- * --- Now Half-width SJIS katakana is converted to 2-byte JIS katakana. ---
- * If 2-byte SJIS, convert to 2-byte JIS.
- * Otherwise assume user-defined SJIS, just copy 2 bytes.
- *
- * If either SJIS buffer does not contain complete SJIS char or JIS buffer
- * is full, then return unconverted SJIS to caller. Caller should
- * append more data and recall mz_sjis2jis.
- */
-
- MODULE_PRIVATE unsigned char *
- mz_sjis2jis( CCCDataObject obj,
- const unsigned char *sjisbuf, /* SJIS buf for conversion */
- int32 sjisbufsz) /* SJIS buf size in bytes */
- {
- unsigned char *tobuf = NULL;
- int32 tobufsz;
- register unsigned char *sjisp, *tobufp; /* current byte in bufs */
- register unsigned char *sjisep, *toep; /* end of buffers */
- int32 uncvtlen;
- unsigned char *uncvtbuf = INTL_GetCCCUncvtbuf(obj);
- #ifdef FEATURE_KATAKANA
- unsigned char outbuf[2]; /* for half-width kana */
- uint32 byteused; /* for half-width kana */
- #endif
-
- /* Allocate a JIS buffer: */
- /* JIS is longer than SJIS because of ESC seq. In the worst case
- * ( alternating Half-width Kana and Roman chars ), converted
- * JIS will be 4X the size of the original SJIS + 1 for nul byte.
- * Worst case: single half-width kana:
- * ESC ( I KANA ESC ( J
- */
- uncvtlen = strlen((char *)uncvtbuf);
- tobufsz = ((sjisbufsz + uncvtlen) << 2) + 8;
- if ((tobuf = (unsigned char *)XP_ALLOC(tobufsz)) == (unsigned char *)NULL) {
- INTL_SetCCCRetval(obj, MK_OUT_OF_MEMORY);
- return(NULL);
- }
- /* Initialize pointers, etc. */
- sjisp = (unsigned char *)sjisbuf;
- sjisep = sjisp + sjisbufsz - 1;
-
- #define uncvtp tobufp /* use tobufp as temp */
- /* If prev. unconverted chars, append unconverted
- * chars w/new chars and try to process.
- */
- if (uncvtbuf[0] != '\0') {
- uncvtp = uncvtbuf + uncvtlen;
- while (uncvtp < (uncvtbuf + UNCVTBUF_SIZE) &&
- sjisp <= sjisep)
- *uncvtp++ = *sjisp++;
- *uncvtp = '\0'; /* nul terminate */
- sjisp = uncvtbuf; /* process unconverted first */
- sjisep = uncvtp - 1;
- }
- #undef uncvtp
-
- tobufp = tobuf;
- toep = tobufp + tobufsz - 2; /* save space for terminating null */
-
- WHILELOOP:
- /* While SJIS data && space in JIS buf. */
- while ((sjisp <= sjisep) && (tobufp <= toep)) {
- if (*sjisp < 0x80) {
- /* ASCII/JIS-Roman */
- if (INTL_GetCCCJismode(obj) != JIS_Roman) {
- InsASCII_ESC(tobufp, obj);
- }
- *tobufp++ = *sjisp++;
-
- } else if (*sjisp < 0xA0) {
- /* 1st byte of 2-byte low SJIS. */
- if (sjisp+1 > sjisep) /* No 2nd byte in SJIS buffer? */
- break;
-
- if (INTL_GetCCCJismode(obj) != JIS_208_83) {
- Ins208_83_ESC(tobufp, obj);
- }
-
- TwoByteSJIS2JIS(sjisp, tobufp, 0x70);
-
- } else if (*sjisp==0xA0) {
- /* SJIS half-width space. */
- /* Just treat like Roman?? */
- if (INTL_GetCCCJismode(obj) != JIS_Roman) {
- InsASCII_ESC(tobufp, obj);
- }
- *tobufp++ = *sjisp++;
-
- } else if (*sjisp < 0xE0) {
- /* SJIS half-width katakana */
- #ifdef FEATURE_KATAKANA
- if (INTL_GetCCCJismode(obj) != JIS_208_83) {
- Ins208_83_ESC(tobufp, obj);
- }
- INTL_SjisHalf2FullKana(sjisp, (uint32)sjisep - (uint32)sjisp + 1, outbuf, &byteused);
- /* SJIS Katakana is 0x8340-0x8396 */
- *tobufp++ = ((outbuf[0] - 0x70) << 1) - 1; /* assign 1st byte */
- if (outbuf[1] > 0x7F)
- *tobufp++ = outbuf[1] - 0x20;
- else
- *tobufp++ = outbuf[1] - 0x1F;
- sjisp += byteused;
- #else
- if (INTL_GetCCCJismode(obj) != JIS_HalfKana) {
- InsHalfKana_ESC(tobufp, obj);
- }
- *tobufp++ = *sjisp & 0x7F;
- sjisp++;
- #endif
- } else if (*sjisp < 0xF0) {
- /* 1st byte of 2-byte high SJIS */
- if (sjisp+1 > sjisep) /* No 2nd byte in SJIS buffer? */
- break;
-
- if (INTL_GetCCCJismode(obj) != JIS_208_83) {
- Ins208_83_ESC(tobufp, obj);
- }
-
- TwoByteSJIS2JIS(sjisp, tobufp, 0xB0);
- } else {
- /* User Defined SJIS: copy bytes */
- if (sjisp+1 > sjisep) /* No 2nd byte in SJIS buf? */
- break;
-
- if (INTL_GetCCCJismode(obj) != JIS_208_83) {
- Ins208_83_ESC(tobufp, obj);
- }
-
- *tobufp++ = *sjisp++; /* Just copy 2 bytes. */
- *tobufp++ = *sjisp++;
- }
- }
-
- if (uncvtbuf[0] != '\0') {
- /* tobufp pts to 1st unprocessed char in
- * tobuf. Some may have been processed
- * while processing unconverted chars,
- * so set up ptrs not to process them
- * twice.
- */
- sjisp = (unsigned char *)sjisbuf + (sjisp - uncvtbuf - uncvtlen);
- /* save space for term. null */
- sjisep = (unsigned char *)sjisbuf + sjisbufsz - 1;
- uncvtbuf[0] = '\0'; /* No more uncoverted chars. */
- goto WHILELOOP; /* Process new data */
- }
-
- if (INTL_GetCCCJismode(obj) != JIS_Roman) {
- INTL_SetCCCJismode(obj, JIS_Roman);
- InsASCII_ESC(tobufp, obj);
- }
-
- *tobufp = '\0'; /* null terminate JIS data */
- INTL_SetCCCLen(obj, tobufp - tobuf); /* length not counting null */
-
- if (sjisp <= sjisep) { /* uncoverted SJIS? */
- tobufp = uncvtbuf; /* reuse the tobufp as a TEMP */
- while (sjisp <= sjisep)
- *tobufp++ = *sjisp++;
- *tobufp = '\0'; /* null terminate */
- }
- return(tobuf);
- }
-
-