home *** CD-ROM | disk | FTP | other *** search
- /*
- *******************************************************************************
- * *
- * COPYRIGHT: *
- * (C) Copyright International Business Machines Corporation, 1998 *
- * Licensed Material - Program-Property of IBM - All Rights Reserved. *
- * US Government Users Restricted Rights - Use, duplication, or disclosure *
- * restricted by GSA ADP Schedule Contract with IBM Corp. *
- * *
- *******************************************************************************
- *
- * File scsu.c
- *
- * Modification History:
- *
- * Date Name Description
- * 05/17/99 stephen Creation (ported from java UnicodeCompressor.java)
- * 09/21/99 stephen Updated to handle data splits on decompression.
- *******************************************************************************
- */
-
- #include <limits.h>
-
- #include "scsu.h"
-
- #include "cmemory.h"
-
- /* Generic window shift */
- #define COMPRESSIONOFFSET 0x80
-
- /* Indicates a window index is invalid */
- #define INVALIDWINDOW -1
-
- /* Indicates a character doesn't exist in input */
- #define INVALIDCHAR -1
-
- /* Compression modes */
- #define SINGLEBYTEMODE 0
- #define UNICODEMODE 1
-
- /* Reserved index value */
- #define RESERVEDINDEX 0x00
-
- /* Indices for scripts which cross a half-block boundary */
- #define LATININDEX 0xF9
- #define IPAEXTENSIONINDEX 0xFA
- #define GREEKINDEX 0xFB
- #define ARMENIANINDEX 0xFC
- #define HIRAGANAINDEX 0xFD
- #define KATAKANAINDEX 0xFE
- #define HALFWIDTHKATAKANAINDEX 0xFF
-
- /* Single-byte mode tags */
- #define SDEFINEX 0x0B
- /* 0x0C is a reserved value*/
- #define SRESERVED 0x0C
- #define SQUOTEU 0x0E
- #define SCHANGEU 0x0F
-
- #define SQUOTE0 0x01
- #define SQUOTE1 0x02
- #define SQUOTE2 0x03
- #define SQUOTE3 0x04
- #define SQUOTE4 0x05
- #define SQUOTE5 0x06
- #define SQUOTE6 0x07
- #define SQUOTE7 0x08
-
- #define SCHANGE0 0x10
- #define SCHANGE1 0x11
- #define SCHANGE2 0x12
- #define SCHANGE3 0x13
- #define SCHANGE4 0x14
- #define SCHANGE5 0x15
- #define SCHANGE6 0x16
- #define SCHANGE7 0x17
-
- #define SDEFINE0 0x18
- #define SDEFINE1 0x19
- #define SDEFINE2 0x1A
- #define SDEFINE3 0x1B
- #define SDEFINE4 0x1C
- #define SDEFINE5 0x1D
- #define SDEFINE6 0x1E
- #define SDEFINE7 0x1F
-
- /* Unicode mode tags */
- #define UCHANGE0 0xE0
- #define UCHANGE1 0xE1
- #define UCHANGE2 0xE2
- #define UCHANGE3 0xE3
- #define UCHANGE4 0xE4
- #define UCHANGE5 0xE5
- #define UCHANGE6 0xE6
- #define UCHANGE7 0xE7
-
- #define UDEFINE0 0xE8
- #define UDEFINE1 0xE9
- #define UDEFINE2 0xEA
- #define UDEFINE3 0xEB
- #define UDEFINE4 0xEC
- #define UDEFINE5 0xED
- #define UDEFINE6 0xEE
- #define UDEFINE7 0xEF
-
- #define UQUOTEU 0xF0
- #define UDEFINEX 0xF1
- /* 0xF2 is a reserved value*/
- #define URESERVED 0xF2
-
- /* Local function prototypes */
- static int32_t scsu_makeIndex(int32_t c);
- static bool_t scsu_inDynamicWindow(const UnicodeCompressor *comp,
- int32_t c,
- int32_t whichWindow);
- static bool_t scsu_inStaticWindow(int32_t c,
- int32_t whichWindow);
- static bool_t scsu_isCompressible(int32_t c);
- static int32_t scsu_findDynamicWindow(const UnicodeCompressor *comp,
- int32_t c);
- static int32_t scsu_findStaticWindow(int32_t c);
- static int32_t scsu_getLRDefinedWindow(const UnicodeCompressor *comp);
-
- /* Static tables generated by CompressionTableGenerator */
-
- /** For window offset mapping */
- static int32_t sOffsetTable [] = {
- 0x0, 0x80, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, 0x480,
- 0x500, 0x580, 0x600, 0x680, 0x700, 0x780, 0x800, 0x880, 0x900,
- 0x980, 0xa00, 0xa80, 0xb00, 0xb80, 0xc00, 0xc80, 0xd00, 0xd80,
- 0xe00, 0xe80, 0xf00, 0xf80, 0x1000, 0x1080, 0x1100, 0x1180,
- 0x1200, 0x1280, 0x1300, 0x1380, 0x1400, 0x1480, 0x1500, 0x1580,
- 0x1600, 0x1680, 0x1700, 0x1780, 0x1800, 0x1880, 0x1900, 0x1980,
- 0x1a00, 0x1a80, 0x1b00, 0x1b80, 0x1c00, 0x1c80, 0x1d00, 0x1d80,
- 0x1e00, 0x1e80, 0x1f00, 0x1f80, 0x2000, 0x2080, 0x2100, 0x2180,
- 0x2200, 0x2280, 0x2300, 0x2380, 0x2400, 0x2480, 0x2500, 0x2580,
- 0x2600, 0x2680, 0x2700, 0x2780, 0x2800, 0x2880, 0x2900, 0x2980,
- 0x2a00, 0x2a80, 0x2b00, 0x2b80, 0x2c00, 0x2c80, 0x2d00, 0x2d80,
- 0x2e00, 0x2e80, 0x2f00, 0x2f80, 0x3000, 0x3080, 0x3100, 0x3180,
- 0x3200, 0x3280, 0x3300, 0x3380, 0xe000, 0xe080, 0xe100, 0xe180,
- 0xe200, 0xe280, 0xe300, 0xe380, 0xe400, 0xe480, 0xe500, 0xe580,
- 0xe600, 0xe680, 0xe700, 0xe780, 0xe800, 0xe880, 0xe900, 0xe980,
- 0xea00, 0xea80, 0xeb00, 0xeb80, 0xec00, 0xec80, 0xed00, 0xed80,
- 0xee00, 0xee80, 0xef00, 0xef80, 0xf000, 0xf080, 0xf100, 0xf180,
- 0xf200, 0xf280, 0xf300, 0xf380, 0xf400, 0xf480, 0xf500, 0xf580,
- 0xf600, 0xf680, 0xf700, 0xf780, 0xf800, 0xf880, 0xf900, 0xf980,
- 0xfa00, 0xfa80, 0xfb00, 0xfb80, 0xfc00, 0xfc80, 0xfd00, 0xfd80,
- 0xfe00, 0xfe80, 0xff00, 0xff80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0, 0x250, 0x370,
- 0x530, 0x3040, 0x30a0, 0xff60
- };
-
- /** For quick identification of a byte as a single-byte mode tag */
- static bool_t sSingleTagTable [] = {
- FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
- FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
- };
-
- /** For quick identification of a byte as a unicode mode tag */
- static bool_t sUnicodeTagTable [] = {
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
- TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
- TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
- FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
- FALSE
- };
-
- /** Static compression window offsets */
- static int32_t sOffsets [] = {
- 0x0000, /* for quoting single-byte mode tags*/
- 0x0080, /* Latin-1 Supplement*/
- 0x0100, /* Latin Extended-A*/
- 0x0300, /* Combining Diacritical Marks*/
- 0x2000, /* General Punctuation*/
- 0x2080, /* Curency Symbols*/
- 0x2100, /* Letterlike Symbols and Number Forms*/
- 0x3000 /* CJK Symbols and Punctuation*/
- };
-
-
- void
- scsu_init(UnicodeCompressor *comp)
- {
- /* initialize to defaults*/
- scsu_reset(comp);
- }
-
- void
- scsu_compress(UnicodeCompressor *comp,
- uint8_t **target,
- const uint8_t *targetLimit,
- const UChar **source,
- const UChar *sourceLimit,
- UErrorCode *status)
- {
- /* the current position in the source unichar buffer*/
- const UChar *unicharBuffer = *source;
-
- /* the current position in the target byte buffer*/
- uint8_t *byteBuffer = *target;
-
- /* the current unicode character from the source buffer*/
- int32_t curUC = INVALIDCHAR;
-
- /* the index for the current character*/
- int32_t curIndex = -1;
-
- /* look ahead*/
- int32_t nextUC = INVALIDCHAR;
- int32_t forwardUC = INVALIDCHAR;
-
- /* temporary for window searching*/
- int32_t whichWindow = 0;
-
- /* high and low bytes of the current unicode character*/
- int32_t hiByte = 0;
- int32_t loByte = 0;
-
-
- /* verify we weren't passed a failing error code */
- if(U_FAILURE(*status)) {
- return;
- }
- /* verify the target buffer can hold at least 4 bytes */
- else if(targetLimit - byteBuffer < 4) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- mainLoop:
- while( unicharBuffer < sourceLimit && byteBuffer < targetLimit) {
- switch( comp->fMode ) {
-
- /* main single byte mode compression loop*/
- case SINGLEBYTEMODE:
- while( unicharBuffer < sourceLimit && byteBuffer < targetLimit ) {
-
- /* get current char*/
- curUC = *unicharBuffer++;
-
- /* get next char*/
- if( unicharBuffer < sourceLimit )
- nextUC = *unicharBuffer;
- else
- nextUC = INVALIDCHAR;
-
- /* chars less than 0x0080 (excluding tags) go straight in
- stream */
- if( curUC < 0x0080 ) {
- loByte = curUC;
-
- /* we need to check and make sure we don't
- accidentally write a single byte mode tag to
- the stream unless it's quoted */
- if(sSingleTagTable[loByte]) {
- /* make sure there is enough room to write
- both bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- /* since we know the byte is less than 0x80, SQUOTE0
- will use static window 0, or Latin-1*/
- *byteBuffer++ = (uint8_t) SQUOTE0;
- }
-
- *byteBuffer++ = (uint8_t) loByte;
- }
-
- /* if the char belongs to current window, convert it
- to a byte by adding the generic compression offset
- and subtracting the window's offset*/
- else if(scsu_inDynamicWindow(comp,
- curUC, comp->fCurrentWindow) ) {
- *byteBuffer++ = (uint8_t)
- (curUC - comp->fOffsets[ comp->fCurrentWindow ]
- + COMPRESSIONOFFSET);
- }
-
- /* if char is not in compressible range, either switch
- to or quote from unicode*/
- else if( ! scsu_isCompressible(curUC) ) {
- /* only check next character if it is valid*/
- if(nextUC != INVALIDCHAR && scsu_isCompressible(nextUC)) {
- /* make sure there is enough room to write all
- three bytes if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) SQUOTEU;
- *byteBuffer++ = (uint8_t) (curUC >> 8);
- *byteBuffer++ = (uint8_t) curUC;
- }
- else {
- /* make sure there is enough room to write all
- four bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 3) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) SCHANGEU;
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag */
- if( sUnicodeTagTable[hiByte] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
-
- comp->fMode = UNICODEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
- }
-
- /* if the char is in a currently defined dynamic
- window, figure out which one, and either switch to
- it or quote from it*/
- else if( (whichWindow = scsu_findDynamicWindow(comp, curUC))
- != INVALIDWINDOW ) {
- /* look ahead*/
- if( (unicharBuffer + 1) < sourceLimit )
- forwardUC = *(unicharBuffer + 1);
- else
- forwardUC = INVALIDCHAR;
-
- /* all three chars in same window, switch to that
- window- inDynamicWindow will return FALSE for
- INVALIDCHAR*/
- if( scsu_inDynamicWindow(comp, nextUC, whichWindow)
- && scsu_inDynamicWindow(comp, forwardUC, whichWindow)){
- /* make sure there is enough room to write
- both bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) (SCHANGE0 + whichWindow);
- *byteBuffer++ = (uint8_t)
- (curUC - comp->fOffsets[whichWindow]
- + COMPRESSIONOFFSET);
- comp->fTimeStamps [ whichWindow ] = ++(comp->fTimeStamp);
- comp->fCurrentWindow = whichWindow;
- }
-
- /* either only next char or neither in same
- window, so quote*/
- else {
- /* make sure there is enough room to write
- both bytes and if not, rewind the source stream
- and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) (SQUOTE0 + whichWindow);
- *byteBuffer++ = (uint8_t)
- (curUC - comp->fOffsets[whichWindow]
- + COMPRESSIONOFFSET);
- }
- }
-
- /* if a static window is defined, and the following
- character is not in that static window, quote from
- the static window Note: to quote from a static
- window, don't add 0x80*/
- else if( (whichWindow = scsu_findStaticWindow(curUC))
- != INVALIDWINDOW
- && ! scsu_inStaticWindow(nextUC, whichWindow) ) {
- /* make sure there is enough room to write both
- bytes if not, rewind the source stream and
- break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) (SQUOTE0 + whichWindow);
- *byteBuffer++ = (uint8_t) (curUC - sOffsets[whichWindow]);
- }
-
- /* if a window is not defined, decide if we want to
- define a new one or switch to unicode mode*/
- else {
- /* determine index for current char (char is
- compressible)*/
- curIndex = scsu_makeIndex(curUC);
- comp->fIndexCount[curIndex]++;
-
- /* look ahead*/
- if( (unicharBuffer + 1) < sourceLimit )
- forwardUC = *(unicharBuffer + 1);
- else
- forwardUC = INVALIDCHAR;
-
- /* if we have encountered this index at least once
- before, define a new window*/
- if( comp->fIndexCount[curIndex] > 1 ) {
- /* make sure there is enough room to write all
- three bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- /* get least recently defined window*/
- whichWindow = scsu_getLRDefinedWindow(comp);
-
- *byteBuffer++ = (uint8_t) (SDEFINE0 + whichWindow);
- *byteBuffer++ = (uint8_t) curIndex;
- *byteBuffer++ = (uint8_t)
- (curUC - sOffsetTable[curIndex]
- + COMPRESSIONOFFSET);
-
- comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
- comp->fCurrentWindow = whichWindow;
- comp->fTimeStamps [whichWindow] = ++(comp->fTimeStamp);
- }
-
- /* three chars in a row with same index, define a
- new window- makeIndex will return RESERVEDINDEX
- for INVALIDCHAR*/
- else if( curIndex == scsu_makeIndex(nextUC)
- && curIndex == scsu_makeIndex(forwardUC) ) {
- /* make sure there is enough room to write all
- three bytes if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- whichWindow = scsu_getLRDefinedWindow(comp);
-
- *byteBuffer++ = (uint8_t) (SDEFINE0 + whichWindow);
- *byteBuffer++ = (uint8_t) curIndex;
- *byteBuffer++ = (uint8_t)
- (curUC - sOffsetTable[curIndex]
- + COMPRESSIONOFFSET);
-
- comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
- comp->fCurrentWindow = whichWindow;
- comp->fTimeStamps [whichWindow] = ++(comp->fTimeStamp);
- }
-
- /* only two chars in a row with same index, so
- switch to unicode mode makeIndex will return
- RESERVEDINDEX for INVALIDCHAR*/
- else if( curIndex == scsu_makeIndex(nextUC)
- && curIndex != scsu_makeIndex(forwardUC) ) {
- /* make sure there is enough room to write all
- four bytes if not, rewind the source stream
- and break out*/
- if( (byteBuffer + 3) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) SCHANGEU;
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag */
- if( sUnicodeTagTable[hiByte] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
-
- comp->fMode = UNICODEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
-
- /* three chars have different indices, so switch
- to unicode mode*/
- else {
- /* make sure there is enough room to write all
- four bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 3) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) SCHANGEU;
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag*/
- if( sUnicodeTagTable[ hiByte ] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
-
- comp->fMode = UNICODEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
- }
- }
- break;
-
- /* main unicode mode compression loop*/
- case UNICODEMODE:
- while(unicharBuffer < sourceLimit && byteBuffer < targetLimit) {
-
- /* get current char*/
- curUC = *unicharBuffer++;
-
- /* get next char*/
- if( unicharBuffer < sourceLimit )
- nextUC = *unicharBuffer;
- else
- nextUC = INVALIDCHAR;
-
- /* if we have two uncompressible unichars in a row,
- put the current char's bytes in the stream*/
- if( ! scsu_isCompressible(curUC)
- || (nextUC != INVALIDCHAR
- && ! scsu_isCompressible(nextUC)) ) {
- /* make sure there is enough room to write all
- three bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag*/
- if( sUnicodeTagTable[ hiByte ] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
- }
-
- /* bytes less than 0x80 can go straight in the stream,
- but in single-byte mode*/
- else if( curUC < 0x0080 ) {
- loByte = curUC;
-
- /* if two chars in a row below 0x80 and the
- current char is not a single-byte mode tag,
- switch to single-byte mode*/
- if(nextUC != INVALIDCHAR
- && nextUC < 0x0080 && ! sSingleTagTable[ loByte ] ) {
- /* make sure there is enough room to write
- both bytes and if not, rewind the source stream
- and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- /* use window 0, but any would work*/
- *byteBuffer++ = (uint8_t) UCHANGE0;
- *byteBuffer++ = (uint8_t) loByte;
-
- comp->fCurrentWindow = 0;
- comp->fTimeStamps [0] = ++(comp->fTimeStamp);
- comp->fMode = SINGLEBYTEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
-
- /* otherwise, just write the bytes to the stream
- (this will cover the case of only 1 char less
- than 0x80 and single-byte mode tags)*/
- else {
- /* make sure there is enough room to write
- both bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- /* since the character is less than 0x80, the
- high byte is always 0x00 - no need for
- (curUC >> 8)*/
- *byteBuffer++ = (uint8_t) 0x00;
- *byteBuffer++ = (uint8_t) loByte;
- }
- }
-
- /* figure out if the current unichar is in a defined
- window*/
- else if( (whichWindow = scsu_findDynamicWindow(comp, curUC))
- != INVALIDWINDOW ) {
- /* if two chars in a row in the same window,
- switch to that window and go to single-byte
- mode inDynamicWindow will return FALSE for
- INVALIDCHAR*/
- if( scsu_inDynamicWindow(comp, nextUC, whichWindow) ) {
- /* make sure there is enough room to write
- both bytes if not, rewind the source stream
- and break out*/
- if( (byteBuffer + 1) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- *byteBuffer++ = (uint8_t) (UCHANGE0 + whichWindow);
- *byteBuffer++ = (uint8_t)
- (curUC - comp->fOffsets[whichWindow]
- + COMPRESSIONOFFSET);
-
- comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
- comp->fCurrentWindow = whichWindow;
- comp->fMode = SINGLEBYTEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
-
- /* otherwise, just quote the unicode for the
- char*/
- else {
- /* make sure there is enough room to write all
- three bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag*/
- if( sUnicodeTagTable[ hiByte ] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
- }
- }
-
- /* char is not in a defined window*/
- else {
- /* determine index for current char (char is
- compressible)*/
- curIndex = scsu_makeIndex(curUC);
- comp->fIndexCount[curIndex]++;
-
- /* look ahead*/
- if( (unicharBuffer + 1) < sourceLimit )
- forwardUC = *unicharBuffer;
- else
- forwardUC = INVALIDCHAR;
-
- /* if we have encountered this index at least once
- before, define a new window for it that hasn't
- previously been redefined*/
- if( comp->fIndexCount[curIndex] > 1 ) {
- /* make sure there is enough room to write all
- three bytes if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- /* get least recently defined window*/
- whichWindow = scsu_getLRDefinedWindow(comp);
-
- *byteBuffer++ = (uint8_t) (UDEFINE0 + whichWindow);
- *byteBuffer++ = (uint8_t) curIndex;
- *byteBuffer++ = (uint8_t)
- (curUC - sOffsetTable[curIndex]
- + COMPRESSIONOFFSET);
-
- comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
- comp->fCurrentWindow = whichWindow;
- comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
- comp->fMode = SINGLEBYTEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
-
- /* if three chars in a row with the same index,
- define a new window makeIndex will return
- RESERVEDINDEX for INVALIDCHAR*/
- else if( curIndex == scsu_makeIndex(nextUC)
- && curIndex == scsu_makeIndex(forwardUC) ) {
- /* make sure there is enough room to write all
- three bytes if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- whichWindow = scsu_getLRDefinedWindow(comp);
-
- *byteBuffer++ = (uint8_t) (UDEFINE0 + whichWindow);
- *byteBuffer++ = (uint8_t) curIndex;
- *byteBuffer++ = (uint8_t)
- (curUC - sOffsetTable[curIndex]
- + COMPRESSIONOFFSET);
-
- comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
- comp->fCurrentWindow = whichWindow;
- comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
- comp->fMode = SINGLEBYTEMODE;
-
- /* use a goto here for speed, to avoid having
- to check fMode in the while loop at the top
- of the case */
- goto mainLoop;
- }
-
- /* otherwise just quote the unicode, and save our
- windows for longer runs*/
- else {
- /* make sure there is enough room to write all
- three bytes and if not, rewind the source
- stream and break out*/
- if( (byteBuffer + 2) >= targetLimit) {
- --unicharBuffer;
- goto finish;
- }
-
- hiByte = curUC >> 8;
- loByte = curUC;
-
- /* add quote Unicode tag*/
- if( sUnicodeTagTable[ hiByte ] )
- *byteBuffer++ = (uint8_t) UQUOTEU;
-
- *byteBuffer++ = (uint8_t) hiByte;
- *byteBuffer++ = (uint8_t) loByte;
- }
- }
- }
- } /* end switch*/
- }
-
- finish:
-
- /* fill in output parameters*/
- *target = byteBuffer;
- *source = unicharBuffer;
-
- if(unicharBuffer < sourceLimit)
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- void
- scsu_decompress(UnicodeCompressor *comp,
- UChar **target,
- const UChar *targetLimit,
- const uint8_t **source,
- const uint8_t *sourceLimit,
- UErrorCode *status)
- {
- /* the current position in the source byte buffer*/
- const uint8_t *byteBuffer = *source;
-
- /* the current position in the target unichar buffer*/
- UChar *unicharBuffer = *target;
-
- /* the current byte from the source buffer*/
- int32_t aByte = 0x00;
-
- /* temporary for calculating surrogate pairs */
- int32_t normalizedBase;
-
- /* temporary used for look-ahead */
- int32_t dByte;
-
-
- /* verify we weren't passed a failing error code */
- if(U_FAILURE(*status)) {
- return;
- }
- /* verify the target buffer can hold at least 1 UChar */
- else if(targetLimit - unicharBuffer < sizeof(UChar)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- /* if our internal buffer isn't empty, flush its contents
- to the output buffer before doing any more decompression */
- if(comp->fBufferLength > 0) {
-
- int32_t newBytes = 0;
- const uint8_t *newSource = comp->fBuffer;
- const uint8_t *newSourceLimit = comp->fBuffer + USCSU_BUFSIZE;
-
- /* fill the buffer completely, to guarantee one full character */
- if(comp->fBufferLength != USCSU_BUFSIZE) {
- newBytes = USCSU_BUFSIZE - comp->fBufferLength;
-
- /* verify there are newBytes bytes in byteBuffer */
- if(sourceLimit - byteBuffer < newBytes)
- newBytes = sourceLimit - byteBuffer;
-
- icu_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
- }
-
- /* reset buffer length to 0 before recursive call */
- comp->fBufferLength = 0;
-
- /* call self recursively to decompress the buffer */
- scsu_decompress(comp, &unicharBuffer, targetLimit,
- &newSource, newSourceLimit, status);
-
- /* update the positions into the arrays */
- /* unicharBuffer was updated by the call to decompress above */
- byteBuffer += newBytes;
- }
-
- /* the main decompression loop*/
- mainLoop:
- while(byteBuffer < sourceLimit && unicharBuffer < targetLimit) {
-
- switch(comp->fMode) {
-
- /* single-byte mode decompression loop*/
- case SINGLEBYTEMODE:
- while(byteBuffer < sourceLimit && unicharBuffer < targetLimit) {
-
- /* get the next byte */
- aByte = *byteBuffer++;
-
- switch(aByte) {
- /* All bytes from 0x80 through 0xFF are remapped to
- chars or surrogate pairs according to the currently
- active window */
- case 0x80: case 0x81: case 0x82: case 0x83: case 0x84:
- case 0x85: case 0x86: case 0x87: case 0x88: case 0x89:
- case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E:
- case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93:
- case 0x94: case 0x95: case 0x96: case 0x97: case 0x98:
- case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D:
- case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2:
- case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7:
- case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC:
- case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1:
- case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6:
- case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB:
- case 0xBC: case 0xBD: case 0xBE: case 0xBF: case 0xC0:
- case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5:
- case 0xC6: case 0xC7: case 0xC8: case 0xC9: case 0xCA:
- case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF:
- case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4:
- case 0xD5: case 0xD6: case 0xD7: case 0xD8: case 0xD9:
- case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE:
- case 0xDF: case 0xE0: case 0xE1: case 0xE2: case 0xE3:
- case 0xE4: case 0xE5: case 0xE6: case 0xE7: case 0xE8:
- case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED:
- case 0xEE: case 0xEF: case 0xF0: case 0xF1: case 0xF2:
- case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7:
- case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC:
- case 0xFD: case 0xFE: case 0xFF:
-
- /* For offsets <= 0xFFFF, convert to a single char by
- adding the window's offset and subtracting the
- generic compression offset*/
- if(comp->fOffsets[ comp->fCurrentWindow ] <= 0xFFFF) {
- *unicharBuffer++ = (UChar)
- (aByte + comp->fOffsets[comp->fCurrentWindow]
- - COMPRESSIONOFFSET);
- }
- /* For offsets > 0x10000, convert to a surrogate pair by
- normBase = window's offset - 0x10000
- high surrogate = 0xD800 + (normBase >> 10)
- low surrogate = 0xDC00 + (normBase & 0x3FF)
- + (byte & 0x7F) */
- else {
- /* make sure there is enough room to write
- both characters
- if not, save state and break out */
- if((unicharBuffer + 1) >= targetLimit) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- normalizedBase = comp->fOffsets[comp->fCurrentWindow]
- - 0x10000;
- *unicharBuffer++ =
- (UChar) (0xD800 + (normalizedBase >> 10));
- *unicharBuffer++ = (UChar)
- (0xDC00 + (normalizedBase & 0x3FF)
- + (aByte & 0x7F));
- }
- break;
-
- /* bytes from 0x20 through 0x7F are treated as ASCII
- and are remapped to chars by padding the high byte
- (this is the same as quoting from static window 0)
- NUL (0x00), HT (0x09), CR (0x0A), LF (0x0D) are
- treated as ASCII as well*/
- case 0x00: case 0x09: case 0x0A: case 0x0D:
- case 0x20: case 0x21: case 0x22: case 0x23: case 0x24:
- case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:
- case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E:
- case 0x2F: case 0x30: case 0x31: case 0x32: case 0x33:
- case 0x34: case 0x35: case 0x36: case 0x37: case 0x38:
- case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D:
- case 0x3E: case 0x3F: case 0x40: case 0x41: case 0x42:
- case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
- case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C:
- case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51:
- case 0x52: case 0x53: case 0x54: case 0x55: case 0x56:
- case 0x57: case 0x58: case 0x59: case 0x5A: case 0x5B:
- case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60:
- case 0x61: case 0x62: case 0x63: case 0x64: case 0x65:
- case 0x66: case 0x67: case 0x68: case 0x69: case 0x6A:
- case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
- case 0x70: case 0x71: case 0x72: case 0x73: case 0x74:
- case 0x75: case 0x76: case 0x77: case 0x78: case 0x79:
- case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E:
- case 0x7F:
- *unicharBuffer++ = (UChar) aByte;
- break;
-
- /* quote unicode*/
- case SQUOTEU:
- /* verify we have two bytes following tag and if not,
- rewind the source stream and break out */
- if( (byteBuffer + 1) >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- aByte = *byteBuffer++;
- *unicharBuffer++ =
- (UChar) (aByte << 8 | *byteBuffer++);
- break;
-
- /* switch to Unicode mode*/
- case SCHANGEU:
- comp->fMode = UNICODEMODE;
- /* use a goto here for speed, to avoid having to check
- fMode in the while loop at the top of the case */
- goto mainLoop;
- break;
-
- /* handle all quote tags*/
- case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3:
- case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7:
- /* verify there is a byte following the tag and if
- not, rewind the source stream and break out*/
- if( byteBuffer >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- /* if the byte is in the range 0x00 - 0x7F, use static
- window n- otherwise, use dynamic window n */
- dByte = *byteBuffer++;
- *unicharBuffer++ = (UChar)
- (dByte + (dByte >= 0x00 && dByte < 0x80
- ? sOffsets[aByte - SQUOTE0]
- : (comp->fOffsets[aByte - SQUOTE0]
- - COMPRESSIONOFFSET)));
- break;
-
- /* handle all change tags*/
- case SCHANGE0: case SCHANGE1: case SCHANGE2: case SCHANGE3:
- case SCHANGE4: case SCHANGE5: case SCHANGE6: case SCHANGE7:
- comp->fCurrentWindow = (aByte - SCHANGE0);
- break;
-
- /* handle all define tags*/
- case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
- case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
- /* verify there is a byte following the tag and if
- not, rewind the source stream and break out*/
- if( byteBuffer >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- comp->fCurrentWindow = (aByte - SDEFINE0);
- comp->fOffsets[comp->fCurrentWindow] =
- sOffsetTable[*byteBuffer++];
- break;
-
- /* handle define extended tag*/
- case SDEFINEX:
- /* verify we have two bytes following tag and if not,
- rewind the source stream and break out*/
- if( (byteBuffer + 1) >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- aByte = *byteBuffer++;
- comp->fCurrentWindow = (aByte & 0xE0) >> 5;
- comp->fOffsets[comp->fCurrentWindow] = 0x10000
- + (0x80
- * (((aByte & 0x1F) << 8) | *byteBuffer++));
- break;
-
- /* reserved, shouldn't happen*/
- case SRESERVED:
- break;
-
- } /* end switch*/
- } /* end while*/
- break;
-
- /* unicode mode decompression loop*/
- case UNICODEMODE:
- while( byteBuffer < sourceLimit && unicharBuffer < targetLimit ) {
-
- /* get the next byte */
- aByte = *byteBuffer++;
-
- switch( aByte ) {
- /* handle all define tags*/
- case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
- case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
- /* verify there is a byte following tag and if not,
- rewind the source stream and break out*/
- if( byteBuffer >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- comp->fCurrentWindow = (aByte - UDEFINE0);
- comp->fOffsets[comp->fCurrentWindow] =
- sOffsetTable[*byteBuffer++];
- comp->fMode = SINGLEBYTEMODE;
- /* use a goto here for speed, to avoid having to check
- fMode in the while loop at the top of the case */
- goto mainLoop;
- break;
-
- /* handle define extended tag*/
- case UDEFINEX:
- /* verify we have two bytes following tag if not,
- rewind the source stream and break out*/
- if( (byteBuffer + 1) >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- aByte = *byteBuffer++;
- comp->fCurrentWindow = (aByte & 0xE0) >> 5;
- comp->fOffsets[comp->fCurrentWindow] = 0x10000
- + (0x80
- * (((aByte & 0x1F) << 8) | *byteBuffer++));
- comp->fMode = SINGLEBYTEMODE;
- /* use a goto here for speed, to avoid having to check
- fMode in the while loop at the top of the case */
- goto mainLoop;
- break;
-
- /* handle all change tags*/
- case UCHANGE0: case UCHANGE1: case UCHANGE2: case UCHANGE3:
- case UCHANGE4: case UCHANGE5: case UCHANGE6: case UCHANGE7:
- comp->fCurrentWindow = (aByte - UCHANGE0);
- comp->fMode = SINGLEBYTEMODE;
- /* use a goto here for speed, to avoid having to check
- fMode in the while loop at the top of the case */
- goto mainLoop;
- break;
-
- /* quote unicode*/
- case UQUOTEU:
- /* verify we have two bytes following tag if not,
- rewind the source stream and break out*/
- if( byteBuffer >= sourceLimit - 1) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- aByte = *byteBuffer++;
- *unicharBuffer++ = (UChar)
- (aByte << 8 | *byteBuffer++);
- break;
-
- default:
- /* verify there is a byte following tag if not, rewind
- the source stream and break out*/
- if( byteBuffer >= sourceLimit ) {
- --byteBuffer;
- icu_memcpy(comp->fBuffer, byteBuffer,
- sourceLimit - byteBuffer);
- comp->fBufferLength = sourceLimit - byteBuffer;
- byteBuffer += comp->fBufferLength;
- goto finish;
- }
-
- *unicharBuffer++ = (UChar) (aByte << 8 | *byteBuffer++);
- break;
-
- } /* end switch*/
- } /* end while*/
- break;
-
- } /* end switch( comp->fMode )*/
- } /* end while*/
-
-
- finish:
-
- /* fill in return values*/
- *target = unicharBuffer;
- *source = byteBuffer;
-
- if(byteBuffer < sourceLimit)
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- }
-
- /** Reset the compressor to its initial state. */
- void
- scsu_reset(UnicodeCompressor *comp)
- {
- int32_t i;
-
- /* reset dynamic windows*/
- comp->fOffsets[0] = 0x0080; /* Latin-1*/
- comp->fOffsets[1] = 0x00C0; /* Latin-1 Supplement + Latin Extended-A*/
- comp->fOffsets[2] = 0x0400; /* Cyrillic*/
- comp->fOffsets[3] = 0x0600; /* Arabic*/
- comp->fOffsets[4] = 0x0900; /* Devanagari*/
- comp->fOffsets[5] = 0x3040; /* Hiragana*/
- comp->fOffsets[6] = 0x30A0; /* Katakana*/
- comp->fOffsets[7] = 0xFF00; /* Fullwidth ASCII*/
-
- /* reset time stamps*/
- for(i = 0; i < USCSU_NUM_WINDOWS; i++) {
- comp->fTimeStamps[i] = 0;
- }
-
- /* reset count of seen indices*/
- for( i = 0; i <= USCSU_MAX_INDEX; i++ ) {
- comp->fIndexCount[i] = 0;
- }
-
- comp->fTimeStamp = 0; /* Reset current time stamp*/
- comp->fCurrentWindow = 0; /* Make current window Latin-1*/
- comp->fMode = SINGLEBYTEMODE; /* Start in single-byte mode*/
- comp->fBufferLength = 0; /* Empty buffer */
- }
-
- /**
- * Create the index value for a character.
- * For more information on this function, refer to table X-3
- * <A HREF="http://www.unicode.org/unicode/reports/tr6">UTR6</A>.
- * @param c The character in question.
- * @return An index for c
- */
- static int32_t
- scsu_makeIndex(int32_t c)
- {
- /* check the predefined indices*/
- if( c >= 0x00C0 && c < 0x0140)
- return LATININDEX;
- else if( c >= 0x0250 && c < 0x02D0 )
- return IPAEXTENSIONINDEX;
- else if( c >= 0x0370 && c < 0x03F0 )
- return GREEKINDEX;
- else if( c >= 0x0530 && c < 0x0590 )
- return ARMENIANINDEX;
- else if( c >= 0x3040 && c < 0x30A0 )
- return HIRAGANAINDEX;
- else if( c >= 0x30A0 && c < 0x3120)
- return KATAKANAINDEX;
- else if( c >= 0xFF60 && c < 0xFF9F )
- return HALFWIDTHKATAKANAINDEX;
-
- /* calculate index*/
- else if( c >= 0x0080 && c < 0x3400 )
- return (c / 0x80) & 0xFF;
- else if( c >= 0xE000 && c <= 0xFFFF )
- return ((c - 0xAC00) / 0x80) & 0xFF;
-
- /* should never happen*/
- else {
- return RESERVEDINDEX;
- }
- }
-
- /**
- * Determine if a character is in a dynamic window.
- * @param c The character to test
- * @param whichWindow The dynamic window the test
- * @return TRUE if <TT>c</TT> will fit in <TT>whichWindow</TT>, FALSE
- * otherwise.
- */
- static bool_t
- scsu_inDynamicWindow(const UnicodeCompressor *comp,
- int32_t c,
- int32_t whichWindow)
- {
- return (c >= comp->fOffsets[whichWindow]
- && c < (comp->fOffsets[whichWindow] + 0x80));
- }
-
- /**
- * Determine if a character is in a static window.
- * @param c The character to test
- * @param whichWindow The static window the test
- * @return TRUE if <TT>c</TT> will fit in <TT>whichWindow</TT>, FALSE
- * otherwise.
- */
- static bool_t
- scsu_inStaticWindow(int32_t c,
- int32_t whichWindow)
- {
- return (c >= sOffsets[whichWindow] && c < (sOffsets[whichWindow] + 0x80));
- }
-
- /**
- * Determine if a character is compressible.
- * @param c The character to test.
- * @return TRUE if the <TT>c</TT> is compressible, FALSE otherwise.
- */
- static bool_t
- scsu_isCompressible(int32_t c)
- {
- return (c < 0x3400 || c >= 0xE000);
- }
-
- /**
- * Determine if a dynamic window for a certain character is defined
- * @param c The character in question
- * @return The dynamic window containing <TT>c</TT>, or INVALIDWINDOW if
- * not defined.
- */
- static int32_t
- scsu_findDynamicWindow(const UnicodeCompressor *comp,
- int32_t c)
- {
- int32_t i;
-
- for(i = 0; i < USCSU_NUM_WINDOWS; i++) {
- if(scsu_inDynamicWindow(comp, c, i)) {
- return i;
- }
- }
-
- return INVALIDWINDOW;
- }
-
- /**
- * Determine if a static window for a certain character is defined
- * @param c The character in question
- * @return The static window containing <TT>c</TT>, or INVALIDWINDOW if
- * not defined.
- */
- static int32_t
- scsu_findStaticWindow(int32_t c)
- {
- int32_t i;
-
- for(i = 0; i < USCSU_NUM_STATIC_WINDOWS; i++) {
- if(scsu_inStaticWindow(c, i)) {
- return i;
- }
- }
-
- return INVALIDWINDOW;
- }
-
- /** Find the least-recently defined window */
- static int32_t
- scsu_getLRDefinedWindow(const UnicodeCompressor *comp)
- {
- int32_t leastRU = LONG_MAX;
- int32_t whichWindow = INVALIDWINDOW;
- int32_t i;
-
- /* find least recently used window*/
- for(i = 0; i < USCSU_NUM_WINDOWS; i++ ) {
- if(comp->fTimeStamps[i] < leastRU) {
- leastRU = comp->fTimeStamps[i];
- whichWindow = i;
- }
- }
-
- return whichWindow;
- }
-