home *** CD-ROM | disk | FTP | other *** search
- /*
- ********************************************************************************
- * *
- * Copyright: *
- * (C) Copyright International Business Machines Corporation, 1998 *
- * Licensed Material - Program-Property of IBM - All Rights Reserved. *
- * US Government Users Restricted Rights - Use, duplication, or disclosure *
- * restricted by GSA ADP Schedule Contract with IBM Corp. *
- * *
- ********************************************************************************
- *
- *
- *
- * ucnv_err.c
- * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
- *
- */
-
- #include "uhash.h"
- #include "ucmp8.h"
- #include "ucmp16.h"
- #include "ucnv_bld.h"
- #include "ucnv_err.h"
- #include "ucnv_cnv.h"
- #include "cmemory.h"
- #include "ucnv.h"
-
- #define VALUE_STRING_LENGTH 32
- /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
- #define CODEPOINT_STRING_LENGTH 7
- #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
- #define UNICODE_U_CODEPOINT 0x0055
- #define UNICODE_X_CODEPOINT 0x0058
-
-
- #define ToOffset(a) a<=9?(0x0030+a):(0x0030+a+7)
-
- bool_t
- CONVERSION_U_SUCCESS (UErrorCode err)
- {
- if ((err == U_INVALID_CHAR_FOUND) || (err == U_ILLEGAL_CHAR_FOUND)) return FALSE;
- else return TRUE;
- }
-
- /*Takes a int32_t and fills in a UChar* string with that number "radix"-based
- * and padded with "pad" zeroes
- */
- static void itou (UChar * buffer, int32_t i, int32_t radix, int32_t pad)
- {
- int32_t length = 0;
- int32_t num = 0;
- int8_t digit;
- int32_t j;
- UChar temp;
-
- while (i > radix)
- {
- num = i / radix;
- digit = (int8_t) (i - num * radix);
- buffer[length++] = (UChar) (ToOffset (digit));
- i = num;
- }
-
- buffer[length] = (UChar) (ToOffset (i));
-
- while (length < pad) buffer[++length] = (UChar) 0x0030; /*zero padding */
- buffer[length--] = (UChar) 0x0000;
-
- /*Reverses the string */
- for (j = 0; j < (pad / 2); j++)
- {
- temp = buffer[length - j];
- buffer[length - j] = buffer[j];
- buffer[j] = temp;
- }
-
- return;
- }
-
- /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
- void UCNV_FROM_U_CALLBACK_STOP (UConverter * _this,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- return;
- }
-
-
- /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
- void UCNV_TO_U_CALLBACK_STOP (UConverter * _this,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- return;
- }
-
- void UCNV_FROM_U_CALLBACK_SKIP (UConverter * _this,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- if (CONVERSION_U_SUCCESS (*err)) return;
- *err = U_ZERO_ERROR;
- }
-
- void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- char togo[5];
- int32_t togoLen;
-
-
-
- if (CONVERSION_U_SUCCESS (*err)) return;
-
- /*In case we're dealing with a modal converter a la UCNV_EBCDIC_STATEFUL,
- we need to make sure that the emitting of the substitution charater in the right mode*/
- icu_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
- if (ucnv_getType(_this) == UCNV_EBCDIC_STATEFUL)
- {
- if ((_this->fromUnicodeStatus)&&(togoLen != 2))
- {
- togo[0] = UCNV_SI;
- togo[1] = _this->subChar[0];
- togo[2] = UCNV_SO;
- togoLen = 3;
- }
- else if (!(_this->fromUnicodeStatus)&&(togoLen != 1))
- {
- togo[0] = UCNV_SO;
- togo[1] = _this->subChar[0];
- togo[2] = _this->subChar[1];
- togo[3] = UCNV_SI;
- togoLen = 4;
- }
- }
-
- /*if we have enough space on the output buffer we just copy
- the subchar there and update the pointer */
- if ((targetLimit - *target) >= togoLen)
- {
- icu_memcpy (*target, togo, togoLen);
- *target += togoLen;
- *err = U_ZERO_ERROR;
- if (offsets)
- {
- int i=0;
- for (i=0;i<togoLen;i++) offsets[i]=0;
- offsets += togoLen;
- }
- }
- else
- {
- /*if we don't have enough space on the output buffer
- *we copy as much as we can to it, update that pointer.
- *copy the rest in the internal buffer, and increase the
- *length marker
- */
- icu_memcpy (*target, togo, (targetLimit - *target));
- if (offsets)
- {
- int i=0;
- for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
- offsets += (targetLimit - *target);
- }
- icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
- togo + (targetLimit - *target),
- togoLen - (targetLimit - *target));
- _this->charErrorBufferLength += togoLen - (targetLimit - *target);
- *target += (targetLimit - *target);
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- return;
-
- }
-
- /*uses itou to get a unicode escape sequence of the offensive sequence,
- *uses a clean copy (resetted) of the converter, to convert that unicode
- *escape sequence to the target codepage (if conversion failure happens then
- *we revert to substituting with subchar)
- */
- void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
- char **target,
- const char *targetLimit,
- const UChar ** source,
- const UChar * sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
-
- UChar valueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- const UChar *mySource = *source;
- UChar codepoint[CODEPOINT_STRING_LENGTH];
- int32_t i = 0;
- /*Makes a bitwise copy of the converter passwd in */
- UConverter myConverter = *_this;
- char myTarget[VALUE_STRING_LENGTH];
- char *myTargetAlias = myTarget;
- const UChar *myValueSource = NULL;
- UErrorCode err2 = U_ZERO_ERROR;
- uint32_t myFromUnicodeStatus = _this->fromUnicodeStatus;
-
-
- if (CONVERSION_U_SUCCESS (*err)) return;
-
- ucnv_reset (&myConverter);
- myConverter.fromUnicodeStatus = myFromUnicodeStatus;
-
- ucnv_setFromUCallBack (&myConverter,
- (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP,
- &err2);
- if (U_FAILURE (err2))
- {
- *err = err2;
- return;
- }
-
- codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- codepoint[1] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
-
- while (i < _this->invalidUCharLength)
- {
- itou (codepoint + 2, _this->invalidUCharBuffer[i++], 16, 4);
- icu_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
- valueStringLength += CODEPOINT_STRING_LENGTH - 1;
- }
-
- myValueSource = valueString;
-
- /*converts unicode escape sequence */
- ucnv_fromUnicode (&myConverter,
- &myTargetAlias,
- myTargetAlias + VALUE_STRING_LENGTH,
- &myValueSource,
- myValueSource + CODEPOINT_STRING_LENGTH - 1,
- NULL,
- TRUE,
- &err2);
-
- if (U_FAILURE (err2))
- {
- UCNV_FROM_U_CALLBACK_SUBSTITUTE (_this,
- target,
- targetLimit,
- source,
- sourceLimit,
- offsets,
- flush,
- err);
- return;
- }
-
-
-
- valueStringLength = myTargetAlias - myTarget;
-
- /*if we have enough space on the output buffer we just copy
- * the subchar there and update the pointer
- */
- if ((targetLimit - *target) >= valueStringLength)
- {
- icu_memcpy (*target, myTarget, valueStringLength);
- *target += valueStringLength;
- *err = U_ZERO_ERROR;
-
- if (offsets)
- {
- int i=0;
- for (i=0;i<valueStringLength;i++) offsets[i]=0;
- offsets += valueStringLength;
- }
- }
- else
- {
- /*if we don't have enough space on the output buffer
- *we copy as much as we can to it, update that pointer.
- *copy the rest in the internal buffer, and increase the
- *length marker
- */
-
- if (offsets)
- {
- int i=0;
- for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
- offsets += (targetLimit - *target);
- }
- icu_memcpy (*target, myTarget, (targetLimit - *target));
- icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
- myTarget + (targetLimit - *target),
- valueStringLength - (targetLimit - *target));
- _this->charErrorBufferLength += valueStringLength - (targetLimit - *target);
- *target += (targetLimit - *target);
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- return;
- }
-
-
-
- void UCNV_TO_U_CALLBACK_SKIP (UConverter * _this,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- if (CONVERSION_U_SUCCESS (*err)) return;
- *err = U_ZERO_ERROR;
- }
-
- void UCNV_TO_U_CALLBACK_SUBSTITUTE (UConverter * _this,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
-
- if (CONVERSION_U_SUCCESS (*err)) return;
-
- if ((targetLimit - *target) >= 1)
- {
- **target = 0xFFFD;
- (*target)++;
- if (offsets) *offsets = 0;
- *err = U_ZERO_ERROR;
- }
- else
- {
- _this->UCharErrorBuffer[_this->UCharErrorBufferLength] = 0xFFFD;
- _this->UCharErrorBufferLength++;
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- return;
-
- }
-
- /*uses itou to get a unicode escape sequence of the offensive sequence,
- *and uses that as the substitution sequence
- */
- void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
- UChar ** target,
- const UChar * targetLimit,
- const char **source,
- const char *sourceLimit,
- int32_t *offsets,
- bool_t flush,
- UErrorCode * err)
- {
- UChar uniValueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- const unsigned char *mySource = (const unsigned char *) *source;
- UChar codepoint[CODEPOINT_STRING_LENGTH];
- int32_t j = 0, i = 0;
- const int32_t* offsets_end = offsets +( targetLimit - *target);
-
- if (CONVERSION_U_SUCCESS (*err)) return;
-
- codepoint[0] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- codepoint[1] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
-
- while (i < _this->invalidCharLength)
- {
- itou (codepoint + 2, _this->invalidCharBuffer[i++], 16, 2);
- icu_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
- valueStringLength += 4;
- }
-
- if ((targetLimit - *target) >= valueStringLength)
- {
- /*if we have enough space on the output buffer we just copy
- * the subchar there and update the pointer
- */
- icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
- if (offsets)
- {
- for (i = 0; i < valueStringLength; i++) offsets[i] = 0;
- }
- *target += valueStringLength;
-
- *err = U_ZERO_ERROR;
- }
- else
- {
- /*if we don't have enough space on the output buffer
- *we copy as much as we can to it, update that pointer.
- *copy the rest in the internal buffer, and increase the
- *length marker
- */
- icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
- if (offsets)
- {
- for (i = 0; i < (targetLimit - *target); i++) offsets[i] = 0;
- }
-
-
- icu_memcpy (_this->UCharErrorBuffer,
- uniValueString + (targetLimit - *target),
- (sizeof (UChar)) * (valueStringLength - (targetLimit - *target)));
- _this->UCharErrorBufferLength += valueStringLength - (targetLimit - *target);
- *target += (targetLimit - *target);
- *err = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- return;
- }
-