home *** CD-ROM | disk | FTP | other *** search
- /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- *
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL. You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation. Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation. All Rights
- * Reserved.
- */
- /* kinsukof.c */
-
- #include "intlpriv.h"
-
- /* The table is defined in kinsukod.c */
- extern const char *ProhibitBegin_SJIS[];
- extern const char *ProhibitBegin_EUCJP[];
- extern const char *ProhibitBegin_BIG5[];
- extern const char *ProhibitBegin_GB[];
- extern const char *ProhibitBegin_KSC[];
- extern const char *ProhibitBegin_UTF8[];
- extern const char *ProhibitBegin_CNS[];
-
- extern const char *ProhibitEnd_SJIS[];
- extern const char *ProhibitEnd_EUCJP[];
- extern const char *ProhibitEnd_BIG5[];
- extern const char *ProhibitEnd_GB[];
- extern const char *ProhibitEnd_KSC[];
- extern const char *ProhibitEnd_UTF8[];
- extern const char *ProhibitEnd_CNS[];
-
- PUBLIC const char *INTL_NonBreakingSpace(uint16 win_csid)
- {
-
- #ifdef XP_MAC
- return "\07"; /* 0x07 */
- #else
- return "\240"; /* 0xA0 */
- #endif
-
- }
- /*
- INTL_CharClass is used for multibyte to divide character to different type
- */
- #define IN_BETWEEN(a,b,c) (((a) <= (b)) && ((b) <= (c)))
- PUBLIC int
- INTL_CharClass(int charset, unsigned char *pstr)
- {
- int c1, c2, c3;
-
- c1 = *pstr;
-
- switch (charset)
- {
- case CS_SJIS:
- /*
- SEVEN_BIT_CHAR: [0x00-0x7F]
- HALFWIDTH_PRONOUNCE_CHAR: [0xA0-0xE0]
- FULLWIDTH_ASCII_CHAR: [0x82] [0x60-0x9A]
- [0x83] [0x9f-0xB6] ( Really no ASCII but Greek and Cyrillic )
- [0x83] [0xBF-0x8F]
- [0x84] [0x40-0x60]
- [0x84] [0x70-0x8F]
- FULLWIDTH_PRONOUNCE_CHAR: [0x82] [0x9F-0xF1]
- [0x83] [0x40-0x96]
- [0x81] [0x5B-0x5D]
- KANJI_CHAR: [0x88-0xFC] [xxxxxxxxx] (Except above)
-
- Note: We count Cyrillic and Greek as FULLWIDTH_ASCII_CHAR
-
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- if (IN_BETWEEN(0xA0, c1, 0xE0))
- return HALFWIDTH_PRONOUNCE_CHAR;
-
- c2 = *(pstr + 1);
-
- switch(c1)
- {
- case 0x81:
- if(IN_BETWEEN(0x5B, c2, 0x5D))
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0x82:
- if(IN_BETWEEN(0x60, c2, 0x9A))
- return FULLWIDTH_ASCII_CHAR;
-
- if(IN_BETWEEN(0x9F, c2, 0xF1))
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0x83:
- if(IN_BETWEEN(0x9F, c2, 0xB6) || IN_BETWEEN(0xBF, c2, 0xD0))
- return FULLWIDTH_ASCII_CHAR;
-
- if(IN_BETWEEN(0x40, c2, 0x96))
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0x84:
- if(IN_BETWEEN(0x40, c2, 0x8F) || IN_BETWEEN(0xBF, c2, 0xD0))
- return FULLWIDTH_ASCII_CHAR;
- break;
- }
-
-
- if (IN_BETWEEN(0x88, c1, 0xFC))
- return KANJI_CHAR;
-
- return UNCLASSIFIED_CHAR;
-
- case CS_EUCJP: /* TO BE TEST ON UNIX */
- /*
- SEVEN_BIT_CHAR: [0x00-0x7F]
- HALFWIDTH_PRONOUNCE_CHAR: [0x8E]
- FULLWIDTH_ASCII_CHAR: [0xA3] [0xC1-0xDA]
- [0xE1-0xFA]
- [0xA6] [0xA1-0xB8]
- [0xC1-0xD8]
- [0xA7] [0xA1-0xC1]
- [0xD1-0xF1]
- [0x8F] [0xA6-0xAF]
- FULLWIDTH_PRONOUNCE_CHAR: [0xA4] [xxxxxxx]
- [0xA5] [xxxxxxx]
- [0x81] [0x5B-0x5D]
- KANJI_CHAR: [0xB0-0xFF] [xxxx]
- [0x8F] [>0xB0]
-
- Note: We count Cyrillic and Greek as FULLWIDTH_ASCII_CHAR
-
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- c2 = *(pstr + 1);
-
- switch(c1)
- {
- case 0x8E:
- return HALFWIDTH_PRONOUNCE_CHAR;
-
- case 0x8F:
- if(IN_BETWEEN(0xA6, c2, 0xAF))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xA3:
- if(IN_BETWEEN(0xC1, c2, 0xDA) || IN_BETWEEN(0xE1, c2, 0xFA))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xA4: case 0xA5:
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- case 0xA6:
- if(IN_BETWEEN(0xA1, c2, 0xB8) || IN_BETWEEN(0xC1, c2, 0xD8))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xA7:
- if(IN_BETWEEN(0xA1, c2, 0xC1) || IN_BETWEEN(0xD1, c2, 0xF1))
- return FULLWIDTH_ASCII_CHAR;
- break;
- }
-
-
- if(
- (c1 >= 0xB0) ||
- ((c1 == 0x8F) && (c2 > 0xB0))
- )
- {
- return KANJI_CHAR;
- }
-
- return UNCLASSIFIED_CHAR;
-
- case CS_KSC_8BIT:
- /*
- SEVEN_BIT_CHAR: [0x00-0x80]
- HALFWIDTH_PRONOUNCE_CHAR: None
- FULLWIDTH_ASCII_CHAR: [0xA3] [0xC1-0xDA]
- [0xE1-0xFA]
- [0xA5] [0xC1-0xD8]
- [0xE1-0xF8]
- [0xAC] [0xA1-0xC2]
- [0xD1-0xF2]
- FULLWIDTH_PRONOUNCE_CHAR: [0xA4] [0xA1-0xFE]
- [0xB0-0xC8] [xxxxxxxxx]
- KANJI_CHAR: [0xCA-0xFD] [xxxxxxxxx]
-
- Note: We didn't handle Hiragana and Katakana here
- We count Cyrillic and Greek as FULLWIDTH_ASCII_CHAR
-
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- c2 = *(pstr + 1);
- if (
- ((c1== 0xA3) && (IN_BETWEEN(0xC1, c2, 0xDA) || IN_BETWEEN(0xE1, c2, 0xFA))) ||
- ((c1== 0xA5) && (IN_BETWEEN(0xC1, c2, 0xD8) || IN_BETWEEN(0xE1, c2, 0xF8))) ||
- ((c1== 0xAC) && (IN_BETWEEN(0xA1, c2, 0xC2) || IN_BETWEEN(0xD1, c2, 0xF2)))
- )
- {
- return FULLWIDTH_ASCII_CHAR;
- }
-
- if (
- ((c1== 0xA4) && (IN_BETWEEN(0xA1, c2, 0xFE))) ||
- (IN_BETWEEN(0xB0, c1, 0xC8))
- )
- {
- return FULLWIDTH_PRONOUNCE_CHAR;
- }
-
- if (IN_BETWEEN(0xCA, c1, 0xFD))
- return KANJI_CHAR;
-
- return UNCLASSIFIED_CHAR;
-
- case CS_GB_8BIT:
- /*
- SEVEN_BIT_CHAR: [0x00-0x7F]
- HALFWIDTH_PRONOUNCE_CHAR:
- FULLWIDTH_ASCII_CHAR: [0xA3] [0xC1-0xDA]
- [0xE1-0xFA]
- [0xA6] [0xA1-0xB8] Greek
- [0xC1-0xD8]
- [0xA7] [0xA1-0xC1] Cyrillic
- [0xD1-0xF1]
- [0xA8] [0xA1-0xBA] European
- FULLWIDTH_PRONOUNCE_CHAR: [0xA4,0xA5,0xA8] [xxxx]
- KANJI_CHAR:
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- c2 = *(pstr + 1);
- if (
- ((c1== 0xA3) && (IN_BETWEEN(0xC1, c2, 0xDA) || IN_BETWEEN(0xE1, c2, 0xFA))) ||
- ((c1== 0xA6) && (IN_BETWEEN(0xA1, c2, 0xB8) || IN_BETWEEN(0xC1, c2, 0xD8))) ||
- ((c1== 0xA7) && (IN_BETWEEN(0xA1, c2, 0xC1) || IN_BETWEEN(0xD1, c2, 0xF1))) ||
- ((c1== 0xA8) && (IN_BETWEEN(0xA1, c2, 0xBA)) )
- )
- {
- return FULLWIDTH_ASCII_CHAR;
- }
-
- if ((c1 == 0xA4) || (c1 == 0xA5) || (c1 == 0xA8))
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- if (IN_BETWEEN(0xB0, c1, 0xF7))
- return KANJI_CHAR;
-
- return UNCLASSIFIED_CHAR;
-
- case CS_BIG5:
- /*
- SEVEN_BIT_CHAR: [0x00-0x7F]
- HALFWIDTH_PRONOUNCE_CHAR:
- FULLWIDTH_ASCII_CHAR: [0xA2] [0xCF-0xFF]
- [0xA3] [0x40-0x73]
- FULLWIDTH_PRONOUNCE_CHAR: [0xA3] [0x74-0x7E]
- [0xA1-0xBF]
- KANJI_CHAR: [0xA4-0xFF] [xxxxxxxxx]
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- c2 = *(pstr + 1);
-
- switch(c1)
- {
- case 0xA2:
- if (IN_BETWEEN(0xCF, c2, 0xFF))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xA3:
- if (IN_BETWEEN(0x74, c2, 0x7E) || IN_BETWEEN(0xA1, c2, 0xBF))
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- if (IN_BETWEEN(0x40, c2, 0x73))
- return FULLWIDTH_ASCII_CHAR;
-
- break;
- }
-
- if (c1 >= 0xA4)
- return KANJI_CHAR;
-
- return UNCLASSIFIED_CHAR;
-
- case CS_CNS_8BIT: /* TO BE TEST ON UNIX */
- /*
- SEVEN_BIT_CHAR: [0x00-0x7F]
- HALFWIDTH_PRONOUNCE_CHAR:
- FULLWIDTH_ASCII_CHAR: [0xA4] [0xC1-0xFE]
- [0xA5] [0xA1-0xC6]
- FULLWIDTH_PRONOUNCE_CHAR: [0xA5] [0xC7-0xF0]
- KANJI_CHAR: [0xC4-0xFF] [xxxxxxxxx]
- [0x8E]
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- c2 = *(pstr + 1);
-
- switch(c1)
- {
- case 0xA4:
- if(IN_BETWEEN(0xC1, c2, 0xFE))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xA5:
- if(IN_BETWEEN(0xC7, c2, 0xF0))
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- if(IN_BETWEEN(0xA1, c2, 0xC6))
- return FULLWIDTH_ASCII_CHAR;
- break;
- }
-
- if (IN_BETWEEN(0xC4, c1, 0x8E))
- return KANJI_CHAR;
-
- return UNCLASSIFIED_CHAR;
-
- case CS_UTF8:
- /*
- SEVEN_BIT_CHAR:
-
- FULLWIDTH_ASCII_CHAR:
- U+0000 - U+10FF
- [C0-E0] [xxxx] Done
- [E1] [80-83] [xxxx] Done
- U+1E00 - U+1FFF
- [E1] [B8-BF] Done
- U+FF21 - U+FF3A
- [EF] [BC] [A1-BA] Done
- U+FF41 - U+FF5A
- [EF] [BD] [81-9A] Done
-
- FULLWIDTH_PRONOUNCE_CHAR:
- U+1100 - U+11FF
- [E1] [84-87] Done
- U+3040 - U+318F
- [E3] [81-85] [xx] Done
- [E3] [86] [80-8F] Done
- U+FF66 - U+FFDC
- [EF] [BD] [AC-]
- [EF] [BE]
- [EF] [BF] [-9C]
- U+AC00 - U+D7FF
- [EA] [B0-] Done
- [EB-EC] [xxx] Done
- [ED] [-9F] Done
-
- KANJI_CHAR:
- U+4E00 - U+9FFF
- [E4] [B8-] Done
- [E5-E9] [xx] Done
- */
- if (c1 < 0x80)
- return SEVEN_BIT_CHAR;
-
- if (IN_BETWEEN(0xC0, c1, 0xE0))
- {
- return FULLWIDTH_ASCII_CHAR;
- }
-
- c2 = *(pstr + 1);
-
- switch(c1)
- {
- case 0xE1:
- if (IN_BETWEEN(0x80, c2, 0x83) || IN_BETWEEN(0xB8, c2, 0xBF))
- return FULLWIDTH_ASCII_CHAR;
- if (IN_BETWEEN(0x84, c2, 0x87))
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- break;
-
- case 0xE3:
- if (IN_BETWEEN(0x81, c2, 0x85))
- return FULLWIDTH_PRONOUNCE_CHAR;
-
- if (c2 == 0x86)
- {
- c3 = *(pstr + 2);
- if (IN_BETWEEN(0x80, c3, 0x8F))
- return FULLWIDTH_PRONOUNCE_CHAR;
- }
-
- break;
-
- case 0xE4:
- if (c2 >= 0xB8)
- return KANJI_CHAR;
- break;
-
- case 0xE5: case 0xE6: case 0xE7: case 0xE8: case 0xE9:
- return KANJI_CHAR;
- break;
-
- case 0xEA:
- if (c2 >= 0xB0)
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0xEB: case 0xEC:
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0xED:
- if (c2 <= 0x9F)
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0xEF:
- c3 = *(pstr + 2);
- switch(c2)
- {
- case 0xBC:
- if (IN_BETWEEN(0xA1, c3, 0xBA))
- return FULLWIDTH_ASCII_CHAR;
- break;
-
- case 0xBD:
- if (IN_BETWEEN(0x81, c3, 0x9A))
- return FULLWIDTH_ASCII_CHAR;
- if (c3 >= 0xAC)
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0xBE:
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
-
- case 0xBF:
- if (c3 <= 0x9C)
- return FULLWIDTH_PRONOUNCE_CHAR;
- break;
- }
- break;
- }
-
- return UNCLASSIFIED_CHAR;
- default:
- break;
- }
-
- return UNCLASSIFIED_CHAR;
- }
-
- #define IF_A_IN_ARRAY_B_THEN_RETURN_C(a,b,c) \
- { \
- int j; \
- for (j = 0; (b)[j][0]; j++) \
- if (XP_STRNCMP((char *)a, (b)[j], XP_STRLEN((b)[j])) == 0) \
- return (c); \
- }
-
- #define IF_PROHIBIT_CLASS_THEN_RETURN(a,ba,ea) \
- { \
- IF_A_IN_ARRAY_B_THEN_RETURN_C(a,ba,PROHIBIT_BEGIN_OF_LINE); \
- IF_A_IN_ARRAY_B_THEN_RETURN_C(a,ea,PROHIBIT_END_OF_LINE); \
- }
-
- PUBLIC int INTL_KinsokuClass(int16 win_csid, unsigned char *pstr)
- {
- switch (win_csid)
- {
- case CS_SJIS:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_SJIS,ProhibitEnd_SJIS);
- break;
- case CS_EUCJP:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_EUCJP,ProhibitEnd_EUCJP);
- break;
- case CS_GB_8BIT:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_GB,ProhibitEnd_GB);
- break;
- case CS_BIG5:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_BIG5,ProhibitEnd_BIG5);
- break;
- case CS_CNS_8BIT:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_CNS,ProhibitEnd_CNS);
- break;
- case CS_KSC_8BIT:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_KSC,ProhibitEnd_KSC);
- break;
- case CS_UTF8:
- IF_PROHIBIT_CLASS_THEN_RETURN(pstr,ProhibitBegin_UTF8,ProhibitEnd_UTF8);
- if( *pstr <= 0xE2) /* UCS2 < 0x2000 */
- return PROHIBIT_WORD_BREAK;
- break;
- }
-
- return PROHIBIT_NOWHERE;
- }
-
-
-
-