home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 6 / AACD06.ISO / AACD / Programming / ICU / src / icuapps / locexp / util / decompcb.c next >
Encoding:
C/C++ Source or Header  |  1999-10-22  |  6.9 KB  |  310 lines

  1. #include "stdio.h"
  2. #include "stddef.h"
  3.  
  4. /* Largest decomposition sequence is 18 chars at U+FDFA */
  5. #define DECOMP_MAX 32
  6.  
  7. #include "utypes.h"
  8. #include "ustring.h"
  9.  
  10. #include "ucnv.h"
  11. #include "ucnv_bld.h"
  12. #include "udat.h"
  13. #include "ucal.h"
  14. #include "uchar.h"
  15.  
  16. #include "ucol.h"
  17.  
  18. #include "decompcb.h"
  19.  
  20. UConverterFromUCallback DECOMPOSE_lastResortCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
  21.  
  22. static UChar block0300Subs[] =
  23.     { 0x0060, 0x00b4, 0x005e, 0x007e, 0x007e, 0x00af, 0x0306, 0x0307, 0x00a8, 0x0309, 0x00b0, 0x0022, 0x030c, 0x0022 }; 
  24.  
  25.  
  26. static UChar block0390Subs[] = 
  27.   0x0390,
  28.   0x0041,
  29.   0x0042,
  30.   0x0047,
  31.   0x0044,
  32.   0x0045,
  33.   0x005A,
  34.   0x0048,
  35.   0x0398,
  36.   0x0049
  37. };
  38.  
  39. #ifdef KXI
  40. #include "../dev/kxitbl.c"
  41. #endif
  42.  
  43. /* clone the converter, reset it, and then try to transcode the source into the
  44.    target. If it fails, then transcode into the error buffer. 
  45.  
  46.    source isn't modified because this fcn is expected to deal with all of it.
  47.    
  48.    This would be a very useful function for other callbacks.
  49. */
  50. static void convertIntoTargetOrErrChars(UConverter *_this,
  51.                     char **target,
  52.                     const char *targetLimit,
  53.                     const UChar *source,
  54.                     const UChar *sourceLimit,
  55.                     UErrorCode *err)
  56. {
  57.   const UChar      *sourceAlias = source;
  58.   UErrorCode subErr = U_ZERO_ERROR;   
  59.   char       *myTarget;
  60.   UConverter myConverter = *_this; /* bitwise copy */
  61.  
  62.   ucnv_reset(&myConverter); /* necessary???? */
  63.  
  64.   /*  ucnv_setFromUCallBack (&myConverter,               <-- unneeded
  65.              (UConverterFromUCallback)  UCNV_FROM_U_CALLBACK_DECOMPOSE,
  66.              &err2);*/
  67.   
  68.   ucnv_fromUnicode (&myConverter,
  69.             target,
  70.             targetLimit,
  71.             &sourceAlias,
  72.             sourceLimit,
  73.             NULL,
  74.             TRUE,
  75.             &subErr); /* pass them the real error. */
  76.   
  77.   if(subErr == U_INDEX_OUTOFBOUNDS_ERROR)
  78.     {
  79.       /* it didn't fit. */
  80.       subErr = U_ZERO_ERROR;
  81.  
  82.       myTarget = _this->charErrorBuffer + _this->charErrorBufferLength;
  83.  
  84.       /* OK hit it */
  85.       ucnv_fromUnicode(&myConverter,
  86.                &myTarget,
  87.                _this->charErrorBuffer + UCNV_ERROR_BUFFER_LENGTH,
  88.                &sourceAlias,
  89.                sourceLimit,
  90.                NULL,
  91.                TRUE,
  92.                &subErr);
  93.       /* fix the charBufferLength */
  94.  
  95.       /* **todo: check err here! */
  96.       _this->charErrorBufferLength = ((unsigned char *)myTarget - _this->charErrorBuffer);
  97.  
  98.       *err = U_INDEX_OUTOFBOUNDS_ERROR;
  99.     }
  100. }
  101.  
  102.  
  103.  
  104.  
  105.  
  106. /* called for each char */
  107. static void DECOMPOSE_uchar(UConverter * _this,
  108.                 const UChar **source,
  109.                 const UChar *sourceLimit,
  110.           char **target,
  111.           const char *targetLimit,
  112.           UChar theChar,
  113.           UErrorCode * err)
  114. {
  115.   UChar      decomposedSequence[DECOMP_MAX];
  116.   const UChar     *tempSource, *output;
  117.   int32_t    decomposedLen;
  118.   UErrorCode err2 = U_ZERO_ERROR;
  119.  
  120.   bool_t     changedSomething = FALSE;  /* have we had *any* effect here? 
  121.                        Used to exit when this fcn isn't doing
  122.                        any good. */
  123.  
  124.   
  125.   tempSource = &theChar;
  126.  
  127.   /* First, attempt a decompose */
  128.   decomposedLen = u_normalize(&theChar,
  129.                   1,
  130.                   UCOL_DECOMP_COMPAT,
  131.                   0,
  132.                   decomposedSequence,
  133.                   DECOMP_MAX,
  134.                   &err2);
  135.  
  136.   if( (decomposedLen != 1)  ||
  137.       u_strncmp(&theChar, decomposedSequence, 1) )
  138.     {
  139.       *err = U_USING_FALLBACK_ERROR;
  140.  
  141.       /* Since we changed something, we'll try doing another conversion.
  142.          Guess what callback we use! */
  143.  
  144.       /* Q:: In fact, do we even need a new converter here? would it mess things up here to
  145.      simply use _this as the converter?? the state would be kinda-ok then. 
  146.      Need opinions! 
  147.       */
  148.       
  149.       convertIntoTargetOrErrChars(_this, 
  150.                   target,
  151.                   targetLimit,
  152.                   decomposedSequence,
  153.                   decomposedSequence+decomposedLen,
  154.                   err);
  155.  
  156.       return;
  157.  
  158.     }
  159.  
  160.  
  161.   /* Try other substitutions */
  162.   decomposedSequence[0] = theChar;
  163.   decomposedSequence[1] = 0;
  164.   output = decomposedSequence;
  165.  
  166.  
  167.   /* the following switch statement can either:  
  168.      1). change 'output' to a totally different string,
  169.      2). modify the string in 'decomposedSequence', OR
  170.      3). do nothing, in which case the SUBSTITUTE callback will be called.
  171.   */
  172.   
  173.   switch(theChar & 0xFF00)
  174.     {
  175.     case 0x0000:
  176.       switch(theChar)
  177.     {
  178.     case 0x00E6:
  179.       decomposedSequence[0] = 'a'; /* ASCIISM */
  180.       decomposedSequence[1] = 'e'; /* ASCIISM */
  181.       decomposedSequence[2] = 0;
  182.       break;
  183.     }
  184.       break;
  185.  
  186.     case 0x0300: /* combining diacriticals ------------------ 0300 */
  187.       if(theChar < (0x0300 + (sizeof(block0300Subs)/sizeof(UChar))))
  188.     {
  189.       decomposedSequence[0] = block0300Subs[theChar & 0x00FF];
  190.     }
  191. #if 0
  192.       else if( (theChar >= 0x0391) && (theChar <= (0x0390 - 1 + (sizeof(block0390Subs)/sizeof(UChar)))))
  193.     { /* greek xliteration */
  194.       decomposedSequence[0] = block0390Subs[theChar - 0x0390];
  195.     }
  196. #endif
  197.       break;
  198.       
  199.     case 0x2000:
  200.       switch(theChar)
  201.     {
  202.     case 0x2044:
  203.       decomposedSequence[0] = '/'; /* ASCIISM */
  204.       break;
  205.     }
  206.       break;
  207.       
  208. #ifdef KXI
  209.     case 0x2F00: /* radical forms */
  210.       if(theChar < 0x2FD6 )
  211.     {
  212.       decomposedSequence[0] = 0xFE43;
  213.       decomposedSequence[1] = kxitbl[((theChar & 0x00FF)*2) + 1];
  214.       decomposedSequence[2] = 0xFE44;
  215.       decomposedSequence[3] = 0x0000;
  216.     }
  217.       break;
  218. #endif
  219.     }
  220.  
  221.   if((output != decomposedSequence) ||       /* if it's a different ptr */
  222.      ( (decomposedSequence[0] != theChar) || /* if the decomposedSeq changed*/
  223.        (decomposedSequence[1] != 0) ) )
  224.     {
  225.       /* Yes! We have something different. Put it out.. */
  226.       *err = U_USING_FALLBACK_ERROR;
  227.       
  228.       convertIntoTargetOrErrChars(_this, 
  229.                   target,
  230.                   targetLimit,
  231.                   output,
  232.                   output+u_strlen(output),
  233.                   err);
  234.       return;
  235.     }
  236.  
  237.  
  238.   /* nothing WE can do .. */
  239.   
  240.   (*DECOMPOSE_lastResortCallback)(_this,
  241.                   target,
  242.                   targetLimit,
  243.                   source, /* source shouldn't be needed */
  244.                   sourceLimit, /* sourcelimit doesn't matter */
  245.                   0, /* offsets, */
  246.                   TRUE, /* flush, */
  247.                   err);
  248.  
  249. }
  250.  
  251. U_CAPI void 
  252.   UCNV_FROM_U_CALLBACK_DECOMPOSE (UConverter * _this,
  253.                         char **target,
  254.                         const char *targetLimit,
  255.                         const UChar ** source,
  256.                         const UChar * sourceLimit,
  257.                         int32_t *offsets,
  258.                         bool_t flush,
  259.                         UErrorCode * err)
  260. {
  261.   int32_t i;
  262.   char   *oldTarget;
  263.  
  264.   if (CONVERSION_U_SUCCESS (*err))
  265.     return;
  266.  
  267.   for(i=0;i<_this->invalidUCharLength;i++)
  268.     {
  269.       oldTarget = *target;
  270.  
  271.       /* 
  272.      TODO: [optimization]
  273.      if(target == targetLimit)
  274.         theirTarget = _this->charErrorBuffer;
  275.      etc.
  276.  
  277.      Then, after calling the fcn, update the charErrorLen.
  278.  
  279.      This way, the subfunc won't have to copy back into the charErrorLen
  280.       */
  281.  
  282.       DECOMPOSE_uchar(_this,
  283.               source,
  284.               sourceLimit,
  285.               target,
  286.               targetLimit,
  287.               _this->invalidUCharBuffer[i],
  288.               err);
  289.  
  290.       /* TODO:
  291.      o  update offsets[0..(target-oldTarget)]
  292.      o  offsets += [target-oldTarget]
  293.  
  294.      thank you. 
  295.         --american P
  296.       */
  297.     }
  298.  
  299.  
  300.   return;
  301. }
  302. /*******************************************************end of borrowed code from ucnv_err.c **/
  303.  
  304.  
  305.  
  306.  
  307.  
  308.  
  309.