Tools

home *** CD-ROM | disk | FTP | other *** search

/ Tools / WinSN5.0Ver.iso / NETSCAP.50 / WIN1998.ZIP / ns / include / libi18n.h < prev next >

Wrap

C/C++ Source or Header | 1998-04-08 | 91.3 KB | 2,652 lines

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ /* libi18n.h */ #ifndef INTL_LIBI18N_H #define INTL_LIBI18N_H #include "xp.h" #ifndef iDocumentContext #define iDocumentContext MWContext * #endif #define Stream NET_StreamClass #define URL URL_Struct #include "csid.h" #ifdef _UNICVT_DLL_ #ifdef XP_WIN32 #define UNICVTAPI __declspec(dllexport) #else #define UNICVTAPI #endif #else /* _UNICVT_DLL is undefined */ #define UNICVTAPI #endif /* Enum for INTL_CSIDIteratorCreate */ enum { csiditerate_TryIMAP4Search = 1 }; /* * To be called when backend catches charset info on <meta ... charset=...> tag. * This will force netlib to go get fresh data again either through cache or * network. */ enum { METACHARSET_NONE = 0, METACHARSET_HASCHARSET, METACHARSET_REQUESTRELAYOUT, METACHARSET_FORCERELAYOUT, METACHARSET_RELAYOUTDONE }; XP_BEGIN_PROTOS /*=======================================================*/ /* Character Code Conversion (CCC). * * * CCCDataObject accessor functions are * build as a table to allow access from a DLL * * Note: new functions must be added at the end * or old apps using the new dll will fail */ /**@name Character Code Conversion (CCC) */ /*@{*/ /** * Function Prototype for the codeset conversion function. * * @param obj Specifies the converter object * @param src Specifies the text to be converted * @param srclen Specifies the length of src * @return the converted text. The length of the converted result could be * access via INTL_GetCCCLen(obj) * @see INTL_GetCCCLen * @see INTL_SetCCCCvtfunc * */ typedef unsigned char *(*CCCFunc)(CCCDataObject obj, const unsigned char * src, int32 srclen); /** * Function Prototype for the Report Auto Detect Result function. * * @param closure Specifies the closure which associated with the converter * object by calling INTL_SetCCCReportAutoDetect * @param obj Specifies the converter object * @param doc_csid Specifies the auto-detected document csid * @see INTL_SetCCCReportAutoDetect * */ typedef void (*CCCRADFunc)(void * closure, CCCDataObject obj, uint16 doc_csid); /** * Opaque converter object. * * This struct is an opaque converter object. */ struct OpaqueCCCDataObject { /* WARNING: MUST MATCH REAL STRUCT */ /** pointer to the converter object private functions struct */ struct INTL_CCCFuncs *funcs_pointer; }; /** * This structure hold the private functions of a conversion object. * * <B>WARNING: THIS STRUCT AND THE TABLE MUST BE IN SYNC WITH EACH OTHER </B> */ struct INTL_CCCFuncs { /** The private function of INTL_SetCCCReportAutoDetect. */ void (*set_report_autodetect)(CCCDataObject, CCCRADFunc, void*); /** The private function of INTL_CallCCCReportAutoDetect. */ void (*call_report_autodetect)(CCCDataObject, uint16); /** The private function of INTL_SetCCCCvtfunc. */ void (*set_cvtfunc)(CCCDataObject, CCCFunc); /** The private function of INTL_GetCCCCvtfunc. */ CCCFunc (*get_cvtfunc)(CCCDataObject); /** The private function of INTL_SetCCCJismode. */ void (*set_jismode)(CCCDataObject,int32); /** The private function of INTL_GetCCCJismode. */ int32 (*get_jismode)(CCCDataObject); /** The private function of INTL_SetCCCCvtflag. */ void (*set_cvtflag)(CCCDataObject,int32); /** The private function of INTL_GetCCCCvtflag. */ int32 (*get_cvtflag)(CCCDataObject); /** The private function of INTL_GetCCCUncvtbuf. */ unsigned char* (*get_uncvtbuf)(CCCDataObject); /** The private function of INTL_SetCCCDefaultCSID. */ void (*set_default_doc_csid)(CCCDataObject, uint16); /** The private function of INTL_GetCCCDefaultCSID. */ uint16 (*get_default_doc_csid)(CCCDataObject); /** The private function of INTL_SetCCCFromCSID. */ void (*set_from_csid)(CCCDataObject, uint16); /** The private function of INTL_GetCCCFromCSID. */ uint16 (*get_from_csid)(CCCDataObject); /** The private function of INTL_SetCCCToCSID. */ void (*set_to_csid)(CCCDataObject, uint16); /** The private function of INTL_GetCCCToCSID. */ uint16 (*get_to_csid)(CCCDataObject); /** The private function of INTL_SetCCCRetval. */ void (*set_retval)(CCCDataObject, int); /** The private function of INTL_GetCCCRetval. */ int (*get_retval)(CCCDataObject); /** The private function of INTL_SetCCCLen. */ void (*set_len)(CCCDataObject, int32); /** The private function of INTL_GetCCCLen. */ int32 (*get_len)(CCCDataObject); }; /** * Create and initialize Character Code Converter Object. * * Create and initialize character code converter. * It also set up a converter if a doc_csid is known (by DOC_CSID_KNOWN). * Caller is responsible for deallocation of an allocated memory. * * @param c Pointer to an i18n private data structure. * @param default_doc_csid Default doc_csid to be used. * @return CCCDataObject Created character code converter object pointer. */ PUBLIC CCCDataObject INTL_CreateDocumentCCC( INTL_CharSetInfo c, uint16 default_doc_csid ); /** * Look for a converter from one charset to another. * * If the from_csid is CS_DEFAULT, this function uses the ID returned by * INTL_GetCCCDefaultCSID. If the to_csid is zero, this function uses the ID * returned by INTL_DocToWinCharSetID for the from_csid determined above. * If found, the converter function is stored in the given character code * conversion object. * * @param from_csid Specifies the charset ID to convert from * @param to_csid Specifies the charset ID to convert to * @param obj Specifies the character code converter object * @return 1 for success, 0 for failure * @see INTL_CreateCharCodeConverter, INTL_CallCharCodeConverter */ PUBLIC int INTL_GetCharCodeConverter( int16 from_csid, int16 to_csid, CCCDataObject obj ); /** * Set up charset internal data by meta charset. * * Given a charset name, this will set up i18n private charset info * which is obtained by a given context. * Input charset name should be obtained from HTML META tag. * * @param context Context to be set up. * @param charset_tag Charset name as an input (e.g. iso-8859-1). * @see INTL_CSIReportMetaCharsetTag */ PUBLIC void INTL_CCCReportMetaCharsetTag( MWContext *context, char *charset_tag ); /** * Passes some more text to the character code converter. * * The character code converter object keeps track of the current state as it * receives data to convert. If partial characters are received, they are * buffered until this function is called again. * INTL_GetCharCodeConverter must first be called before calling this function. * * In some cases, the text is converted in place (in the input buffer). * * @param obj Specifies the character code converter object * @param str Specifies the text to be converted * @param len Specifies the length in bytes of the text * @return The converted text, null terminated * @see INTL_GetCharCodeConverter */ PUBLIC unsigned char *INTL_CallCharCodeConverter( CCCDataObject obj, const unsigned char *str, int32 len ); /** * Initialize and set up a character code converter for a mail charset. * * Allocate memory and initialize for character code converter. * From/To charset is determined by given context or by parsing the source * buffer in case of HTML. * After charsets are determined, it set up a converter function. * Caller is responsible for deallocation of an allocated memory. * * @param context Context to access charset info. * @param isHTML If TRUE then the input stream is parsed for meta tag. * @param buffer Source buffer. * @param buffer_size the length of the source buffer. * @return CCCDataObject Created character code converter object pointer. * @see INTL_CreateCharCodeConverter */ PUBLIC CCCDataObject INTL_CreateDocToMailConverter( iDocumentContext context, XP_Bool isHTML, unsigned char *buffer, uint32 buffer_size ); /** * Create a character code converter object used for codeset conversion. * * @return The new character code converter object * @see INTL_CreateDocumentCCC, INTL_GetCharCodeConverter, * INTL_DestroyCharCodeConverter * @deprecated Obsolescent. Please use INTL_CreateDocumentCCC. */ PUBLIC CCCDataObject INTL_CreateCharCodeConverter(void); /** * Frees the given character code conversion object. * * This function destroys the code conversion object created by * INTL_CreateCharCodeConverter. * * @param obj Specifies the character code conversion object to free * @see INTL_CreateCharCodeConverter */ PUBLIC void INTL_DestroyCharCodeConverter( CCCDataObject obj ); /** * Converts a piece of text from one charset to another. * * This function does not do charset ID auto-detection. The caller must pass * the from/to charset IDs. This function does not keep state. Don't use it to * convert a stream of data. Only use this when you want to convert a string, * and you have no way to hold on to the converter object. * * If the string gets converted in place (use the input buffer), then this * function returns NULL. * * @param fromcsid Specifies the charset ID to convert from * @param tocsid Specifies the charset ID to convert to * @param pSrc Specifies the input text * @param block_size Specifies the number of bytes in the input text * @return The converted text, null terminated, or NULL if converted in place * @see INTL_CallCharCodeConverter */ PUBLIC unsigned char *INTL_ConvertLineWithoutAutoDetect( int16 fromcsid, int16 tocsid, unsigned char *pSrc, uint32 block_size ); /** * Returns the window charset ID corresponding to the given document charset ID. * * This function searches a built-in table to find the first entry that * matches the given document charset ID. If no such entry is found, it * returns CS_FE_ASCII. * * @param csid Specifies the document charset ID * @return The corresponding window charset ID */ PUBLIC int16 INTL_DocToWinCharSetID( int16 csid ); /** * Return the charset used in internet message from a specified charset. * * In the current implementation of Communicator, we assume there is a many to * one relationship between a encoding and a encoding used on internet mail * message. This routines is used to get the outgoing encoding for a specified * encoding. The caller than can convert the text of the specified encoding to * the return encoding and before send out the internet message. Usually the * relationship is the same as the newsgroup posting and this one. However, for * some region/country like Korean, it is not the same. In such region/country, * they use different encodings in internet mail message and newsgroup posting. * In that case INTL_DefaultNewsCharSetID should be used instead. * * Issues: The current model assume the text of a particular encoding is always * sending out as one encoding. Such assumption break when people want send out * message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we * may change this architecture in the near future. * * The mapping are: * <UL> * <LI>CS_ASCII: CS_ASCII * <LI>CS_LATIN1: CS_LATIN1 * <LI>CS_JIS: CS_JIS * <LI>CS_SJIS: CS_JIS * <LI>CS_EUCJP: CS_JIS * <LI>CS_JIS_AUTO: CS_JIS * <LI>CS_SJIS_AUTO: CS_JIS * <LI>CS_EUCJP_AUTO: CS_JIS * <LI>CS_KSC_8BIT: CS_2022_KR [Note 1] * <LI>CS_KSC_8BIT_AUTO: CS_2022_KR [Note 1] * <LI>CS_GB_8BIT: CS_GB_8BIT * <LI>CS_BIG5: CS_BIG5 * <LI>CS_CNS_8BIT: CS_BIG5 * <LI>CS_MAC_ROMAN: CS_LATIN1 * <LI>CS_LATIN2: CS_LATIN2 * <LI>CS_MAC_CE,: CS_LATIN2 * <LI>CS_CP_1250: CS_LATIN2 * <LI>CS_8859_5: CS_KOI8_R [Note 2] * <LI>CS_KOI8_R: CS_KOI8_R [Note 2] * <LI>CS_MAC_CYRILLIC: CS_KOI8_R [Note 2] * <LI>CS_CP_1251: CS_KOI8_R [Note 2] * <LI>CS_8859_7: CS_8859_7 * <LI>CS_CP_1253: CS_8859_7 * <LI>CS_MAC_GREEK: CS_8859_7 * <LI>CS_8859_9: CS_8859_9 * <LI>CS_MAC_TURKISH: CS_8859_9 * <LI>CS_UTF8: CS_UTF7 * <LI>CS_UTF7: CS_UTF7 * <LI>CS_UCS2: CS_UTF7 * <LI>CS_UCS2_SWAP: CS_UTF7 * </UL> * Note: * <OL> * <LI>For INTL_DefaultNewsCharSetID, this value is different * <LI>The value is the one specified in preference * "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See * <A HREF=http://people.netscape.com/ftang/cyrillicmail.html> * http://people.netscape.com/ftang/cyrillicmail.html</A> for details. * </OL> * * @param Specifies the encoding * @return the encoding should be send out for the internet mail message. * @see INTL_DefaultNewsCharSetID */ PUBLIC int16 INTL_DefaultMailCharSetID(int16 csid); /** * Return the charset used in internet message from a specified charset. * * In the current implementation of Communicator, we assume there is a many to * one relationship between a encoding and a encoding used on internet * newsgroup posting. This routines is used to get the outgoing encoding for a * specified encoding. The caller than can convert the text of the specified * encoding to the return encoding and before post the message to the * newsgroup. Usually the relationship is the same as the newsgroup posting * and this one. However, for some region/country like Korean, it is not the * same. In such region/country, they use different encodings in internet mail * message and newsgroup posting. In that case INTL_DefaultMailCharSetID should * be used instead. * * Issues: The current model assume the text of a particular encoding is always * sending out as one encoding. Such assumption break when people want send out * message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we * may change this architecture in the near future. * * The mapping are: * <UL> * <LI>ASCII: CS_ASCII * <LI>LATIN1: CS_LATIN1 * <LI>JIS: CS_JIS * <LI>SJIS: CS_JIS * <LI>EUCJP: CS_JIS * <LI>JIS_AUTO: CS_JIS * <LI>SJIS_AUTO: CS_JIS * <LI>EUCJP_AUTO: CS_JIS * <LI>KSC_8BIT: CS_KSC_8BIT [Note 1] * <LI>KSC_8BIT_AUTO: CS_KSC_8BIT [Note 1] * <LI>GB_8BIT: CS_GB_8BIT * <LI>BIG5: CS_BIG5 * <LI>CNS_8BIT: CS_BIG5 * <LI>MAC_ROMAN: CS_LATIN1 * <LI>LATIN2: CS_LATIN2 * <LI>MAC_CE,: CS_LATIN2 * <LI>CP_1250: CS_LATIN2 * <LI>8859_5: CS_KOI8_R [Note 2] * <LI>KOI8_R: CS_KOI8_R [Note 2] * <LI>MAC_CYRILLIC: CS_KOI8_R [Note 2] * <LI>CP_1251: CS_KOI8_R [Note 2] * <LI>8859_7: CS_8859_7 * <LI>CP_1253: CS_8859_7 * <LI>MAC_GREEK: CS_8859_7 * <LI>8859_9: CS_8859_9 * <LI>MAC_TURKISH: CS_8859_9 * <LI>UTF8: CS_UTF7 * <LI>UTF7: CS_UTF7 * <LI>UCS2: CS_UTF7 * <LI>UCS2_SWAP: CS_UTF7 * </UL> * Note: * <OL> * <LI>For INTL_DefaultMailCharSetID, this value is different * <LI>The value is the one specified in preference * "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See * <A HREF=http://people.netscape.com/ftang/cyrillicmail.html> * http://people.netscape.com/ftang/cyrillicmail.html</A> for details. * </OL> * * @param Specifies the encoding * @return the encoding should be send out for the internet newsgroup. * @see INTL_DefaultMailCharSetID */ PUBLIC int16 INTL_DefaultNewsCharSetID(int16 csid); /** * Tell libi18n which font charset IDs are available in the front end. * * The front end (FE) calls this function to inform libi18n of the charset IDs * of the fonts that are currently available. * * This function calls INTL_SetUnicodeCSIDList to set up the Unicode * machinery. * * The front end must allocate space for this array using malloc/calloc. If * this function is called more than once, the array passed in a previous call * is freed by this function. However, the front end is responsible for * freeing the array at exit time. * * @param charsets Specifies a null-terminated array of charset IDs */ PUBLIC void INTL_ReportFontCharSets( int16 *charsets ); /** * Get the "Unconverted Buffer" from the Converter Object. * * @param obj Specifies the converter object * @return the unconverted buffer in the converter object */ #define INTL_GetCCCUncvtbuf(obj) (obj->funcs_pointer->get_uncvtbuf)(obj) /** * Set the "conversion result length" to the converter object. * * @param obj Specifies the converter object * @param len Specifies the length of current conversion result. * @see INTLGetCCCLen */ #define INTL_SetCCCLen(obj,len) ((obj)->funcs_pointer->set_len)((obj), (len)) /** * Get the "conversion result length" from the converter object. * * @param obj Specifies the converter object * @return the length of conversion result stored in the converter object * @see INTL_SetCCCLen */ #define INTL_GetCCCLen(obj) ((obj)->funcs_pointer->get_len)(obj) /** * Set a private flag "Jismode" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n. * * The name "jismode" refers to the ISO 2022 state (JIS mode). * This is what the field was first used for. * It is now used for other purposes as well, so the name is no longer * appropriate. * * @param obj Specifies the converter object * @param jismode Specifies the Jismode * @see INTL_GetCCCJismode */ #define INTL_SetCCCJismode(obj,jismode) \ ((obj)->funcs_pointer->set_jismode)((obj), (jismode)) /** * Get a private flag "Jismode" from the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * The name "jismode" refers to the ISO 2022 state (JIS mode). * This is what the field was first used for. * It is now used for other purposes as well, so the name is no longer * appropriate. * * @param obj Specifies the converter object * @return the Jismode stored in the converter object * @see INTL_SetCCCJismode */ #define INTL_GetCCCJismode(obj) ((obj)->funcs_pointer->get_jismode)(obj) /** * Set a private flag "Cvtflag" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param cvtflag Specifies the Cvtflag * @see INTL_GetCCCCvtflag */ #define INTL_SetCCCCvtflag(obj,cvtflag) \ ((obj)->funcs_pointer->set_cvtflag)((obj), (cvtflag)) /** * Get a private flag "Cvtflag" from the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @return the Cvtflag stored in the converter object * @see INTL_SetCCCCvtflag */ #define INTL_GetCCCCvtflag(obj) ((obj)->funcs_pointer->get_cvtflag)(obj) /** * Set the "Convert To CSID" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param to_csid Specifies the Convert To CSID * @see INTL_SetCCCToCSID */ #define INTL_SetCCCToCSID(obj,to_csid) \ (((obj)->funcs_pointer->set_to_csid)((obj),(to_csid))) /** * Get the "Convert To CSID" from the converter object. * * @param obj Specifies the converter object * @return the "Convert To CSID" stored in the converter object * @see INTL_SetCCCToCSID */ #define INTL_GetCCCToCSID(obj) (((obj)->funcs_pointer->get_to_csid)(obj)) /** * Set the "Convert From CSID" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param from_csid Specifies the Convert From CSID * @see INTL_SetCCCFromCSID */ #define INTL_SetCCCFromCSID(obj,from_csid) \ (((obj)->funcs_pointer->set_from_csid)((obj),(from_csid))) /** * Get the "Convert From CSID" from the converter object. * * @param obj Specifies the converter object * @return the "Convert From CSID" stored in the converter object * @see INTL_SetCCCFromCSID */ #define INTL_GetCCCFromCSID(obj) (((obj)->funcs_pointer->get_from_csid)(obj)) /** * Set the "Return Value" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param retval Specifies the "Return Value" * @see INTL_GetCCCRetval */ #define INTL_SetCCCRetval(obj,retval) \ (((obj)->funcs_pointer->set_retval)((obj),(retval))) /** * Get the "Return Value" from the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @return the "Return Value" stored in the converter object * @see INTL_SetCCCRetval */ #define INTL_GetCCCRetval(obj) (((obj)->funcs_pointer->get_retval)(obj)) /** * Set the "Conversion Function" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param func Specifies the "Conversion Function" stored in the converter * object * @see INTL_GetCCCCvtfunc */ #define INTL_SetCCCCvtfunc(obj,func) \ (((obj)->funcs_pointer->set_cvtfunc)((obj),(func))) /** * Get the "Conversion Function" from the converter object. * * @param obj Specifies the converter object * @return the "Conversion Function" stored in the converter object * @see INTL_SetCCCCvtfunc */ #define INTL_GetCCCCvtfunc(obj) ((obj)->funcs_pointer->get_cvtfunc)(obj) /** * Set the "Report Auto Detect Result Function" to the converter object. * * @param obj Specifies the converter object * @param func Specifies the "Auto Detect Result Reporting Function" * @param closure Specifies the closure which will be pass to the "Auto * Detect Result Reporting Function" * @see INTL_CallCCCReportAutoDetect */ #define INTL_SetCCCReportAutoDetect(obj,func,closure) \ (((obj)->funcs_pointer->set_report_autodetect)((obj), (func), (closure))) /** * Call the "Report Auto Detect Result Function" associated with the * converter object. * * @param obj Specifies the converter object * @param doc_csid Specifies the document csid which be auto detected * @see INTL_CallCCCReportAutoDetect */ #define INTL_CallCCCReportAutoDetect(obj,doc_csid) \ (((obj)->funcs_pointer->call_report_autodetect)((obj), (doc_csid))) /** * Set the "Default Document CSID" to the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @param default_doc_csid Specifies the Default Document CSID * @see INTL_GetCCCDefaultCSID */ #define INTL_SetCCCDefaultCSID(obj,default_doc_csid) \ ((obj)->funcs_pointer->set_default_doc_csid)((obj), (default_doc_csid)) /** * Get the "Default Document CSID" from the converter object. * * There are no reason any code outside libi18n should call this. * We are considering move this into intlpriv.h. * Don't call this macro unless you are changing libi18n * * @param obj Specifies the converter object * @return the Default Document CSID stored in the converter object * @see INTL_GetCCCDefaultCSID */ #define INTL_GetCCCDefaultCSID(obj) \ (((obj)->funcs_pointer->get_default_doc_csid)(obj)) /*@}*/ /*=======================================================*/ /**@name CharSetID and Charset Name Mapping */ /*@{*/ /** * Returns the preferred MIME charset name corresponding to the given * charset ID. * * Charset names are registered by IANA (Internet Assigned Numbers Authority). * The current charset name database can be found at: * * <A HREF=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets> * ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets</A>. * * This function returns the charset name for the given Character Set ID * which in most cases corresponds to the "(preferred MIME name)" registered * with IANA. This function may return private names not found in the * registered. Private names start with "x-". See INTL_CharSetNameToID for * information about charset IDs. * * @param charSetID Specifies the charset ID * @param charset_return Returns the corresponding charset name, max 128 bytes * @see INTL_CharSetNameToID */ PUBLIC void INTL_CharSetIDToName( int16 charSetID, char *charset_return ); /** * Returns the charset ID corresponding to the given charset name. * * The charset ID is a private 16-bit integer, described in * ns/include/csid.h. If the given charset is unknown, CS_UNKNOWN is returned. * If the given charset is NULL, CS_DEFAULT is returned. Charset names are not * case-sensitive. See INTL_CharSetIDToName for a description of charset names. * * @param charset Specifies the charset name * @return the corresponding charset ID * @see INTL_CharSetIDToName */ PUBLIC int16 INTL_CharSetNameToID( char *charset ); /** * Returns a pointer to the preferred MIME charset name corresponding * to the given charset ID. * * This function is similar to INTL_CharSetIDToName. It returns a pointer to * the charset name. See INTL_CharSetIDToName for other details. * * @param charSetID Specifies the charset ID * @return The corresponding charset name * @see INTL_CharSetIDToName */ PUBLIC unsigned char *INTL_CsidToCharsetNamePt( int16 charSetID ); /** * Returns the Java charset name corresponding to the given charset ID. * * The Java charset name is one that JDK 1.1 and up will understand. * The Java name is defined in * <A HREF= * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303> * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A> * * @param charSetID Specifies the charset ID * @param charset_return Returns the corresponding Java charset name, * max 128 bytes * @see INTL_CharSetIDToJavaCharSetName */ PUBLIC void INTL_CharSetIDToJavaName( int16 charSetID, char *charset_return ); /** * Returns the Java charset name corresponding to the given charset ID. * * The Java charset name is a name used in JDK 1.1 and up. * The Java name is defined in * <A HREF= * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303> * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A> * * @param charSetID Specifies the charset ID * @return the corresponding Java charset name * @see INTL_CharSetIDToJavaName */ PUBLIC const char * PR_CALLBACK INTL_CharSetIDToJavaCharSetName( int16 charSetID ); /** * Returns a pointer to the Java charset name corresponding to * the given charset ID. * * This function is similar to INTL_CharSetIDToJavaCharSetName. See * INTL_CharSetIDToJavaCharSetName for further details. * * @param charSetID Specifies the charset ID * @return The corresponding Java charset name * @see INTL_CharSetIDToJavaCharSetName */ PUBLIC unsigned char *INTL_CsidToJavaCharsetNamePt( int16 charSetID ); /*@}*/ /*=======================================================*/ /**@name Character Set Properties */ /*@{*/ /** * Returns whether or not auto-detection is available for the given charset ID. * * For example, this routine will return TRUE for any of the Japanese charset * IDs, since a Japanese auto-detection routine is available. * * @param csid Specifies the charset ID * @return Whether or not auto-detection is available for the charset ID * @see INTL_GetCharCodeConverter */ PUBLIC XP_Bool INTL_CanAutoSelect( int16 csid ); /** * Returns the charset type. * * Returns the type of the given charset ID. The charset types are defined in * csid.h. * * <UL> * <LI>SINGLEBYTE: single-byte charset (e.g. ISO-8859-1, MacRoman) * <LI>MULTIBYTE: multi-byte charset (e.g. Shift-JIS, Big5) * <LI>STATEFUL: stateful charset (e.g. ISO-2022-JP, UTF-7) * <LI>WIDECHAR: wide character charset (e.g. UCS-2, UCS-4) * </UL> * * @param charsetid Specifies the charset ID. * @return The charset type. */ #define INTL_CharSetType(charsetid) (charsetid & 0x700) /*@}*/ /*=======================================================*/ /**@name Finding Character Boundaries */ /*@{*/ /** * Returns the number of bytes in the given character. * * This function checks for zero bytes within the text, returning the actual * length even if the preceding byte(s) would normally indicate a longer * multibyte character. * * @param charSetID Specifies the charset ID of the text * @param pstr Specifies the 1st byte of the character * @return The number of bytes in the given character * @see INTL_IsLeadByte */ PUBLIC int INTL_CharLen( int charSetID, unsigned char *pstr ); /** * Returns number of bytes in given character, minus 1. * * This function returns the number of bytes in a character that starts with * the given byte, minus 1. I.e. for a single-byte character, it returns zero. * For a double-byte character, it returns 1. And so on. Hence, this function * returns a non-zero value if the given byte is the "lead byte" of a multibyte * character. * This function should not be confused with Windows API isleadbyte(). * * @param charSetID Specifies the charset ID of the text * @param ch Specifies the first byte of a character in the text * @return The number of bytes in the given character, minus 1 * @see INTL_CharLen */ PUBLIC int #ifdef NSPR20 PR_CALLBACK #endif INTL_IsLeadByte( int charSetID, unsigned char ch ); /** * Returns a pointer to the 1st byte of the next character. * * This function checks for zero bytes and returns pstr+1 if any are found, * even if the preceding byte(s) would normally indicate a longer character. * * @param charSetID Specifies the charset ID of the text * @param pstr Specifies the 1st byte of any previous character * @return The 1st byte of the next character * @see INTL_CharLen */ PUBLIC char *INTL_NextChar( int charSetID, char *pstr ); /** * Returns the number of the byte pointed to by the given position. * * Determines whether the byte at the given position is the 1st, 2nd, 3rd * or 4th byte of the character at that position. The pstr pointer must point * to the first byte of any preceding character in the string. The pos * position must be greater than zero, and is the index into pstr plus one. * I.e. the byte at pstr[0] has pos 1. * * If pos points to the only byte in a single-byte character, this function * returns zero. Otherwise, if pos points to the 1st byte, it returns 1. If * pos points to the 2nd byte, it returns 2. And so on. * * @param charSetID Specifies the charset ID of the given text * @param pstr Specifies the beginning of a character in the string * @param pos Specifies the byte position within the string * @return The number of the byte at the given position * @see INTL_CharLen */ PUBLIC int INTL_NthByteOfChar( int charSetID, char *pstr, int pos ); /** * Returns the byte index of the next character. * * Given the position of a character in some text, this function returns the * position of the next character. * * @param charSetID Specifies the charset ID of the text * @param text Specifies the beginning of the text * @param pos Specifies the current position within the text * @return The position of the next character * @see INTL_PrevCharIdxInText */ PUBLIC int INTL_NextCharIdxInText( int16 charSetID, unsigned char *text, int pos ); /** * Returns the byte index of the previous character. * * Given the position of a character in some text, this function returns the * position of the previous character. * * @param charSetID Specifies the charset ID of the text * @param text Specifies the beginning of the text * @param pos Specifies the current position within the text * @return The position of the previous character * @see INTL_NextCharIdxInText */ PUBLIC int INTL_PrevCharIdxInText( int16 charSetID, unsigned char *text, int pos ); /** * Convert number of bytes to number of characters. * * Given a number of bytes in a given string, this function determines the * number of characters. * * @param charSetID Specifies the charset ID of the text * @param text Specifies the text * @param byteCount Specifies the number of bytes * @return The number of characters * @see INTL_TextCharLenToByteCount */ PUBLIC int32 INTL_TextByteCountToCharLen( int16 charSetID, unsigned char *text, uint32 byteCount ); /** * Convert number of characters to number of bytes. * * Given a number of characters in a given string, this function determines the * number of bytes. * * @param charSetID Specifies the charset ID of the text * @param text Specifies the text * @param charLen Specifies the number of characters * @return The number of bytes * @see INTL_TextByteCountToCharLen */ PUBLIC int32 INTL_TextCharLenToByteCount( int16 charSetID, unsigned char *text, uint32 charLen ); /** * Returns the byte index of the next character. * * Given the position of any byte of any character in some text, this function * returns the position of the 1st byte of the next character. The * difference between this function and INTL_NextCharIdxInText is that this * function will accept the position of any byte of a character rather than * just the 1st byte of a character. * * @param charSetID Specifies the charset ID of the text * @param str Specifies the beginning of the text * @param pos Specifies any byte of any character * @return The index of the next character * @see INTL_NextCharIdxInText, INTL_PrevCharIdx */ PUBLIC int INTL_NextCharIdx( int16 charSetID, unsigned char *str, int pos ); /** * Returns the byte index of the previous character. * * Given the position of any byte of any character in some text, this function * returns the position of the 1st byte of the previous character. The * difference between this function and INTL_PrevCharIdxInText is that this * function will accept the position of any byte of a character rather than * just the 1st byte of a character. * * @param charSetID Specifies the charset ID of the text * @param str Specifies the beginning of the text * @param pos Specifies any byte of any character * @return The index of the previous character * @see INTL_PrevCharIdxInText, INTL_NextCharIdx */ PUBLIC int INTL_PrevCharIdx( int16 charSetID, unsigned char *str, int pos ); /*@}*/ /*=======================================================*/ /**@name Single-Byte Charset Conversion Tables (Obsolescent) */ /*@{*/ /** * Free a single-byte charset conversion table. * * This is not really a public function. However, ns/sun-java/awt/macos needs * it, so we have to put it here. * * @see INTL_GetSingleByteTable * @version DEPRECATED. Obsolescent. Use INTL_DestroyCharCodeConverter instead. */ MODULE_PRIVATE void INTL_FreeSingleByteTable(char **cvthdl); /** * Get a single-byte charset conversion table. * * This is not really a public function. However, ns/sun-java/awt/macos needs * it, so we have to put it here. * * @see INTL_FreeSingleByteTable * @see INTL_LockTable * @version DEPRECATED. Obsolescent. Use INTL_GetCharCodeConverter instead. */ MODULE_PRIVATE char **INTL_GetSingleByteTable( int16 fromcsid, int16 tocsid, int32 func_ctx ); /** * Lock the given single-byte charset conversion table in memory. * * This is not really a public function. However, ns/sun-java/awt/macos needs * it, so we have to put it here. * * @see INTL_GetSingleByteTable * @version DEPRECATED. Obsolescent. See INTL_GetSingleByteTable. */ MODULE_PRIVATE char *INTL_LockTable(char **cvthdl); /*@}*/ /*=======================================================*/ /**@name HTTP Headers */ /*@{*/ /** * Return the AcceptLanguage preference. * * Get the HTTP Accept-Language header from preference settings. * * @return Accept-Language header (null-terminated string). * @see INTL_GetAcceptCharset */ PUBLIC char *INTL_GetAcceptLanguage(void); /** * Return the AcceptCharset preference. * * Get the HTTP Accept-Charset header from preference settings. * * @return Accept-Charset header (null-terminated string). * @see INTL_GetAcceptLanguage */ PUBLIC char *INTL_GetAcceptCharset(void); /*@}*/ /*=======================================================*/ /**@name Message Header Processing */ /*@{*/ /** * Decode and convert message header. * * This is a convenience macro that calls INTL_DecodeMimePartIIStr. It is * similar to INTL_DecodeMimePartIIStr, with the exception that it always * attempts to allocate a new buffer instead of returning the original input * buffer where the decoding/conversion may have been performed in place. * * @param r Returns the decoded/converted message header * @param b Specifies the message header * @param c Specifies the target window charset ID * @param f Specifies whether to convert the string into the wincsid or not * @return the decoded/converted message header (r) * @see INTL_DecodeMimePartIIStr */ #define INTL_DECODE_MIME_PART_II(r,b,c,f) \ (r = INTL_DecodeMimePartIIStr((b),(c),(f))), \ ((NULL!=r) && ((r)!=(b))) ? r : (r = XP_STRDUP(b)) /** * Decode and convert message header. * * If the message header contains an RFC 2047 encoded-word, that word is * decoded. Then it performs charset conversion if the dontConvert parameter is * false. Otherwise, it will only decode the string and return. The conversion * may happen later in the process. The flag is needed to work around a double * conversion problem. * * @param header Specifies the message string to be decoded/converted. * @param wincsid Specifies the target window charset ID. * @param dontConvert Specifies whether to convert the string into the wincsid * or not. If the value is true, then it will only decode * any RFC 2047 encoded-words, without converting their * charsets. If the value is false, then it will decode RFC * 2047 encoded-words AND convert them into the specified * wincsid. * @return Decoded and/or converted message header. If the return value is * different from the input buffer, the caller must free the output * buffer by calling XP_FREE when it is no longer needed. * @see INTL_DECODE_MIME_PART_II * @see INTL_EncodeMimePartIIStr * @see INTL_EncodeMimePartIIStr_VarLen */ PUBLIC char *INTL_DecodeMimePartIIStr( const char *header, int16 wincsid, XP_Bool dontConvert ); /** * Convert and encode message header. * * Convert the string into an encoding used in Internet messages and encode * them as per RFC 2047. It will (1) perform the codeset conversion and * (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message * encoding is ISO-2022-KR or ISO-2022-JP). This is a restrict version of * INTL_EncodeMimePartIIStr_VarLen which always use 72 for encodedWordSize * * @param header Specifies the RFC 1522 string to be encoded. * @param wincsid Specifies the source encoding * @param bUseMime Specifies apply RFC 1522 rule or not. If the value is * true or the internet message encoding is ISO-2022-JP * or ISO-2022-KR, then it perform RFC1522 encoding after * convert the text into the internet message encoding, * Otherwise, it only convert the text into internet * message encoding. * @return the encoded/converted header. The caller need to free this by * calling XP_FREE when the result is no longer needed. * @see INTL_DecodeMimePartIIStr * @see INTL_EncodeMimePartIIStr_VarLen */ PUBLIC char *INTL_EncodeMimePartIIStr( char *header, int16 wincsid, XP_Bool bUseMime ); /** * Convert and encode text into RFC 1522 header. * * Convert the string into the encoding used in internet message and encode * them into RFC 1522 form. It will (1) perform the codeset conversion and * (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message * encoding is ISO-2022-KR or ISO-2022-JP). It is same as * INTL_EncodeMimePartIIStr except it allow encodedWordSize value other than 72. * * @param header Specifies the RFC 1522 string to be encoded. * @param wincsid Specifies the source encoding * @param bUseMime Specifies apply RFC 1522 rule or not. If the value * is true or the internet message encoding is * ISO-2022-JP or ISO-2022-KR, then it perform RFC1522 * encoding after convert the text into the internet * message encoding. Otherwise, it only convert the * text into internet message encoding. * @param encodedWordSize Specifies the maximum length of encoded word. * @return the encoded/converted header. The caller need to free this by * calling XP_FREE when the result is no longer needed. * @see INTL_DecodeMimePartIIStr * @see INTL_EncodeMimePartIIStr */ PUBLIC char *INTL_EncodeMimePartIIStr_VarLen( char * header, int16 wincsid, XP_Bool bUseMime, int encodedWordSize ); /** * [OBSOLETE!!!] We should use the INTL_DecodeMimePartIIStr instead of this. * We keep this Macro until we change all the callers. * Please do not use this in the future. */ #define IntlDecodeMimePartIIStr INTL_DecodeMimePartIIStr /** * [OBSOLETE!!!] We should use the INTL_EncodeMimePartIIStr instead of this. * We keep this Macro until we change all the callers. * Please do not use this in the future. */ #define IntlEncodeMimePartIIStr INTL_EncodeMimePartIIStr /** * Set a private flag to remember a state mail/news. * * A flag is used inside libi18n to remember whether we are sending mail or * news. This is because mail encoding and news encoding is different * for Korean. * Note that this should be used carefully since it depends on * the current mail/news implementation. * This is really a hack. It will be removed in the future. * * @param toNews Boolean value to be set to the private flag. */ PUBLIC void INTL_MessageSendToNews(XP_Bool toNews); /** * Convert a string from RFC1522 encoded header and normalize it, by dropping * the case of the character. * * The return value could be used with INTL_StrContains, INTL_StrIs, * INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This * function will normalize a string by dropping the case of character according * to the csid the caller passed in. It will also ignore CR and LF characters. * * @param csid Specifies the encoding of str * @param str Specifies the to-be-normalized string. * @return a normalized string which could be used in INTL_StrContains, * INTL_StrIs , INTL_StrBeginWith and INTL_StrEndWith The caller * should free it by calling XP_FREE when it is not needed. * @see INTL_GetNormalizeStr * @see INTL_StrContains * @see INTL_StrIs * @see INTL_StrBeginWith * @see INTL_StrEndWith */ PUBLIC unsigned char* INTL_GetNormalizeStrFromRFC1522( int16 csid, unsigned char* rfc1522header ); /*@}*/ /*=======================================================*/ /**@name Unicode (UCS-2) Strings */ /*@{*/ /** * Unicode character typedef. * * This is used to represent a 16-bit Unicode (UCS-2) character. */ typedef uint16 INTL_Unicode; /** * Return the length of a Unicode string. * * The given Unicode string must be terminated by U+0000. * * @param ustr Specifies the Unicode string * @return The length of ustr in UCS-2 units, not bytes */ PUBLIC uint32 INTL_UnicodeLen(INTL_Unicode *ustr); /*@}*/ /*=======================================================*/ /**@name Compound Strings */ /*@{*/ /** * A typedef for encoding IDs (charset IDs). * * These are equivalent to charset IDs in the current code base. */ typedef uint16 INTL_Encoding_ID; /* * See comment below. */ typedef struct INTL_CompoundStr INTL_CompoundStr; /** * Compound String. * * A Compound String is constructed as a linked list. Each node has two fields * and a pointer to the next node. The two fields store a pointer to a * uniformly encoded piece of text and the encoding of that text. */ struct INTL_CompoundStr { /** The encoding of the text in this node. */ INTL_Encoding_ID encoding; /** The uniformly encoded text. */ unsigned char *text; /** A pointer to the next node. NULL if there are no more nodes. */ INTL_CompoundStr *next; }; /** * INTL_CompoundStrIterator should really be opaque, but we need to change the * callers first. */ typedef INTL_CompoundStr *INTL_CompoundStrIterator; /** * Construct an INTL_CompoundStr, given some text and its encoding. * * Use this with INTL_CompoundStrCat to create multi-encoding * INTL_CompoundStrs. * * @param inencoding Specifies the encoding of intext. * @param intext Specifies the text to be stored. Null-terminated string. * @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to * destroy it when it is no longer needed. * @see INTL_CompoundStrDestroy */ PUBLIC INTL_CompoundStr* INTL_CompoundStrFromStr( INTL_Encoding_ID inencoding, unsigned char* intext ); /** * Convert the given Unicode string to an INTL_CompoundStr. * * This routine uses information provided by the front end through * INTL_SetUnicodeCSIDList. It converts from Unicode to substrings in the * encodings that the front end said were available (in the font system). * * @param inunicode Specifies the Unicode text to be converted. * @param inlen Specifies the length of inunicode in UCS-2 units, * not bytes. * @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to * destroy it when it is no longer needed. * @see INTL_CompoundStrDestroy */ PUBLIC INTL_CompoundStr* INTL_CompoundStrFromUnicode( INTL_Unicode* inunicode, uint32 inlen ); /** * Destroy an INTL_CompoundStr. * * This function destroys the INTL_CompoundStr created by * INTL_CompoundStrFromStr or INTL_CompoundStrFromUnicode. * * @param Specifies the INTL_CompoundStr to be destroyed. * @see INTL_CompoundStrFromStr * @see INTL_CompoundStrFromUnicode */ PUBLIC void INTL_CompoundStrDestroy(INTL_CompoundStr* This); /** * Concatenate two INTL_CompoundStrs. * * @param s1 Specifies the first INTL_CompoundStr and returns the * concatenated INTL_CompoundStr * @param s2 Specifies the second INTL_CompoundStr * @see INTL_CompoundStrDestroy */ PUBLIC void INTL_CompoundStrCat( INTL_CompoundStr* s1, INTL_CompoundStr* s2 ); /** * Clone an INTL_CompoundStr. * * This function clones an INTL_CompoundStr. * * @param s Specifies the INTL_CompoundStr to be cloned * @return a cloned INTL_CompoundStr. The caller should use * INTL_CompoundStrDestroy to destroy it when it is no longer needed. * @see INTL_CompoundStrDestroy */ PUBLIC INTL_CompoundStr* INTL_CompoundStrClone(INTL_CompoundStr* s1); /** * Start iterating an INTL_CompoundStr. * * Initialize the iterating state and perform the first iteration of an * INTL_CompoundStr. * * @param This Specifies the INTL_CompoundStr to be iterated * @param outencoding Returns the encoding of the first node * @param outtext Returns the text of the first node. The caller should * not free it. * @return INTL_CompoundStrIterator. The state of the iteration. Should be * passed to INTL_CompoundStrNextStr. NULL if the iteration is * finished. * @see INTL_CompoundStrNextStr */ PUBLIC INTL_CompoundStrIterator INTL_CompoundStrFirstStr( INTL_CompoundStr* This, INTL_Encoding_ID *outencoding, unsigned char** outtext ); /** * Iterating INTL_CompoundStr. * * This function iterates through the INTL_CompoundStr for the given * INTL_CompoundStrIterator. * * @param iterator Specifies the INTL_CompoundStrIterator * @param outencoding Returns the encoding of the current node * @param outtext Returns the text of the current node. The caller should * not free it. * @return INTL_CompoundStrIterator. The state of the iteration. Should be * passed to INTL_CompoundStrNextStr. NULL if the iteration is * finished. * @see INTL_CompoundStrFirstStr */ PUBLIC INTL_CompoundStrIterator INTL_CompoundStrNextStr( INTL_CompoundStrIterator iterator, INTL_Encoding_ID *outencoding, unsigned char** outtext ); /*@}*/ /*=======================================================*/ /**@name Unicode Conversion */ /*@{*/ /** * An opaque data object used to iterate through Unicode text for * conversion to font encodings. * * See also the functions that use this object. * * @see INTL_UnicodeToStrIteratorCreate * @see INTL_UnicodeToStrIterate * @see INTL_UnicodeToStrIteratorDestroy * */ typedef void* INTL_UnicodeToStrIterator ; /** * Create an INTL_UnicodeToStrIterator and iterate through it once. * * This function creates an INTL_UnicodeToStrIterator and iterates through it * once to get the first element of Unicode text for font encoding conversion. * The function uses the prioritized Character Set ID list (CSIDList) to * decide which font encoding it will convert to. The iteration stops if the * whole Unicode string is converted. Otherwise, it continues iterating and * uses the next charset in the CSIDlist to convert the Unicode text. * * @param ustr Specifies Unicode string to be converted * @param ustrlen Specifies length of ustr in UCS-2 units not bytes * @param encoding Returns the encoding of the first element. * Returns 0 if there are no more to iterate. * @param dest Specifies the buffer for output and returns the * converted string for the first iteration * @param destbuflen Specifies the length of dest in bytes * @return Iterator which keeps the iteration state * @see INTL_GetUnicodeCSIDList * @see INTL_SetUnicodeCSIDList * @see INTL_UnicodeToStrIterate * @see INTL_UnicodeToStrIteratorDestroy * @see INTL_GetUnicodeCharsetList */ PUBLIC INTL_UnicodeToStrIterator INTL_UnicodeToStrIteratorCreate( INTL_Unicode* ustr, uint32 ustrlen, INTL_Encoding_ID *encoding, unsigned char* dest, uint32 destbuflen ); /** * Iterate through a Unicode object and convert to font encoding. * * Iterate the INTL_UnicodeToStrIterator to get Unicode to font encoding * conversion. * * @param iterator Specifies iterator that keeps the last iteration state * @param encoding Returns the encoding of the first element. Returns 0 * if there are no more to iterate. * @param dest Specifies the buffer for output and returns the * converted string for the current iteration * @param destbuflen Specifies the length of dest in bytes * @return 0 if there are no more elements to iterate. * @see INTL_GetUnicodeCSIDList * @see INTL_SetUnicodeCSIDList * @see INTL_UnicodeToStrIteratorCreate * @see INTL_UnicodeToStrIteratorDestroy * @see INTL_GetUnicodeCharsetList */ PUBLIC int INTL_UnicodeToStrIterate( INTL_UnicodeToStrIterator iterator, INTL_Encoding_ID *encoding, unsigned char* dest, uint32 destbuflen ); /** * Destroy an INTL_UnicodeToStrIterator. * * This function destroys the INTL_UnicodeToStrIterator created by * INTL_UnicodeToStrIterateCreate. * * @param iterator Specifies the iterator to be destroyed * @see INTL_GetUnicodeCSIDList * @see INTL_SetUnicodeCSIDList * @see INTL_UnicodeToStrIteratorCreate * @see INTL_UnicodeToStrIterate * @see INTL_GetUnicodeCharsetList */ PUBLIC void INTL_UnicodeToStrIteratorDestroy( INTL_UnicodeToStrIterator iterator ); /** * Return memory requirement for INTL_UnicodeToStr. * * Returns the maximum memory required for text converted from a Unicode * string to a specified encoding. Call this to prepare memory for * INTL_UnicodeToStr. * * @param encoding Specifies the target encoding * @param ustr Specifies the buffer containing UCS-2 data * @param ustrlen Specifies the valid length of ustr in UCS-2 units * not bytes * @return Number of bytes needed to store the converted result * @see INTL_UnicodeToStr */ PUBLIC uint32 INTL_UnicodeToStrLen( INTL_Encoding_ID encoding, INTL_Unicode* ustr, uint32 ustrlen ); /** * Convert Unicode string to a specified encoding. * * The caller needs to call INTL_UnicodeToStrLen first to prepare memory and * pass into dest. * * @param encoding Specifies the target encoding * @param ustr Specifies the buffer containing UCS-2 data * @param ustrlen Specifies the valid length of ustr in UCS-2 units * not bytes * @param dest Specifies the buffer for the converted text and * returns the converted text * @param destbuflen Specifies the size of dest in bytes * @see INTL_UnicodeToStrLen */ PUBLIC void INTL_UnicodeToStr( INTL_Encoding_ID encoding, INTL_Unicode* ustr, uint32 ustrlen, unsigned char* dest, uint32 destbuflen ); /** * Convert Unicode to text in one encoding by trial and error. * * This routine tries to convert the given Unicode string into text of one * non-Unicode encoding. This is a trial and error function which may be * slow in "THE WORST CASE". However, it does it's best in the best case and * average case. * * @param ustr Specifies the buffer containing UCS-2 data * @param ustrlen Specifies the valid length of ustr in UCS-2 units * not bytes * @param dest Specifies the buffer for the converted text and * returns the converted text * @return Encoding of the converted text */ PUBLIC INTL_Encoding_ID INTL_UnicodeToEncodingStr( INTL_Unicode* ustr, uint32 ustrlen, unsigned char* dest, uint32 destbuflen ); /** * Return memory requirement for INTL_StrToUnicode. * * Return the maximum memory requirement for text converted from the * specified encoding to Unicode. Call this to prepare memory for * INTL_StrToUnicode. The difference between INTL_TextToUnicodeLen is * the input string is specified by a NULL terminated string. * * @param encoding Specifies the encoding of text in src * @param src Specifies the text to be converted * @return Size of Unicode to store the converted output (in * UCS-2 units not bytes) * @see INTL_StrToUnicode * @see INTL_TextToUnicodeLen */ PUBLIC uint32 INTL_StrToUnicodeLen( INTL_Encoding_ID encoding, unsigned char* src ); /** * Convert non-Unicode text to Unicode. * * The caller needs to call INTL_StrToUnicodeLen first to prepare memory and * pass into ustr. The difference between INTL_TextToUnicode is the input * string is specified by a NULL terminated string. * * @param encoding Specifies the encoding of text in src * @param src Specifies the text to be converted * @param ustr Specifies the buffer for Unicode and returns the converted * Unicode * @param ubuflen Specifies the size of the ustr in UCS-2 units not bytes * @return Size of the converted Unicode (in UCS-2 units not bytes) * @see INTL_StrToUnicodeLen * @see INTL_TextToUnicode */ PUBLIC uint32 INTL_StrToUnicode( INTL_Encoding_ID encoding, unsigned char* src, INTL_Unicode* ustr, uint32 ubuflen ); /** * Return memory requirement for INTL_TextToUnicode. * * Return the maximum memory requirement for text converted from a specified * encoding to Unicode . Call this to prepare memory for INTL_TextToUnicode. * The difference between INTL_StrToUnicodeLen is the input is not specified * by a NULL terminated string, but a pointer and length. * * @param encoding Specifies the encoding of text in src * @param src Specifies the text to be converted * @param srclen Specifies the number of bytes in src * @return Size of Unicode to store the converted output (in UCS-2 * units not bytes) * @see INTL_TextToUnicode * @see INTL_StrToUnicodeLen */ PUBLIC uint32 INTL_TextToUnicodeLen( INTL_Encoding_ID encoding, unsigned char* src, uint32 srclen ); /** * Convert text from non-Unicode to Unicode. * * The caller needs to call INTL_TextToUnicodeLen first to prepare memory and * pass into ustr. The difference between INTL_StrToUnicode is the input is * not specified by a NULL terminated string, but a pointer and length. * * @param encoding Specifies the encoding of text in src * @param src Specifies the text to be converted * @param srclen Specifies the number of bytes in src * @param ustr Specifies the buffer for the Unicode string and returns * the converted Unicode string * @param ubuflen Specifies the size of the ustr in the UCS-2 units not * bytes * @return Size of converted Unicode (in UCS-2 units not bytes) * @see INTL_TextToUnicodeLen * @see INTL_StrToUnicode */ PUBLIC uint32 INTL_TextToUnicode( INTL_Encoding_ID encoding, unsigned char* src, uint32 srclen, INTL_Unicode* ustr, uint32 ubuflen ); /** * Initial Unicode conversion routines from a list of Character Set ID (CSID) * for Unicode rendering. * * It should only be called once in the application life time. It should be * called by front end before calling any other Unicode conversion functions. * The list could be retrieved through INTL_GetUnicodeCSIDList or * INTL_GetUnicodeCharsetList. * * @param numberOfItem Specifies the valid number in the csidlist * @param csidlist Specifies a prioritized list of csid to be used for * Unicode to font charset conversion. The function will * make a copy of the list the caller pass in. The caller * could free the pass in list after this function. * @ see INTL_GetUnicodeCSIDList * @ see INTL_UnicodeToStrIteratorCreate * @ see INTL_UnicodeToStrIterate * @ see INTL_UnicodeToStrIteratorDestroy * @ see INTL_GetUnicodeCharsetList */ PUBLIC void INTL_SetUnicodeCSIDList( uint16 numOfItems, int16 *csidlist); /** * Returns a list of Character Set ID (CSID) used for converting Unicode * to font encoding. * * The list is set in the initialization time by the front end through * INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList * and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a * list of CSIDs and the INTL_GetUnicodeCharsetList returns a list of charset * names (strings). * * @param outnum Returns the number of items in the returned CSID array. * @return Array of CSIDs. Caller should change or free the returned array. * @see INTL_SetUnicodeCSIDList * @see INTL_UnicodeToStrIteratorCreate * @see INTL_UnicodeToStrIterate * @see INTL_UnicodeToStrIteratorDestroy * @see INTL_GetUnicodeCharsetList */ PUBLIC int16* INTL_GetUnicodeCSIDList(int16 * outnum); /** * Return a list of charset names (strings) used for converting Unicode to font * encoding. * * The list is set in the initialization time by front end through * INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList * and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a * list of CSIDs and INTL_GetUnicodeCharsetList returns a list of charset * names (strings). * * @param outnum Returns the number of items in the returned charset array * @return Array of charset names. Caller should not change or free the * returned array. * @see INTL_GetUnicodeCSIDList * @see INTL_SetUnicodeCSIDList * @see INTL_UnicodeToStrIteratorCreate * @see INTL_UnicodeToStrIterate * @see INTL_UnicodeToStrIteratorDestroy */ PUBLIC unsigned char **INTL_GetUnicodeCharsetList(int16 * outnum); /** * Converts a UTF-8 sub-string to the appropriate font encoding. * * Converts characters until the encoding changes or * input/output space runs out. * * The segment is NOT NULL TERMINATED * * @param utf8p Specifies the UTF-8 string * @param utf8len Specifies the length of utf8p * @param LE_string Specifies and returns the (pre-allocated) buffer * for the string converted to the font encoding * @param LE_string_len Specifies the length of the buffer for LE_string * @param LE_written_len Returns the valid length of the return LE_string * @param LE_string_csid Returns the CSID of the return LE_string: * <UL> * <LI> * >0 if successful (valid CSID). * <LI> * -1 if not Unicode. * <LI> * -2 if no font encoding. * </UL> * @return Length of converted UTF-8 string */ PUBLIC int utf8_to_local_encoding( const unsigned char *utf8p, const int utf8len, unsigned char *LE_string, int LE_string_len, int *LE_written_len, int16 *LE_string_csid ); /** * Convert text from UTF-8 to UCS-2 encoding. * * UCS-2 is the abbreviation for the two byte form of Unicode. * UTF-8 is a transformation encoding for Unicode. * For more information about UTF-8 look at RFC 2279 in * <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt> * ftp://ds.internic.net/rfc/rfc2279.txt</A> . * For more information about UCS-2, look at <A HREF=http://www.unicode.org> * http://www.unicode.org</A>. * * @param utf8p Specifies the UTF-8 text buffer. It is NULL terminated. * @param num_chars Returns the length of the converted UCS-2 in UCS-2 units * not bytes * @return UCS-2 string, NULL terminated by U+0000, or NULL. The * caller should free it by calling XP_FREE when it is no * longer needed. * @see INTL_UCS2ToUTF8 */ PUBLIC UNICVTAPI uint16 *INTL_UTF8ToUCS2( const unsigned char *utf8p, int32 *num_chars ); /** * Convert text from UCS-2 to UTF-8 encoding. * * UCS-2 is the abbreviation for the two byte form of Unicode. * UTF-8 is a transformation encoding for Unicode. * For more information about UTF-8 look at RFC 2279 in * <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt> * ftp://ds.internic.net/rfc/rfc2279.txt</A> . * For more information about UCS-2, look at <A HREF= http://www.unicode.org> * http://www.unicode.org</A>. * * @param ucs2p Specifies the UCS-2 text buffer * @param num_chars Specifies the length of ucs2p, in UCS-2 units not bytes * @return NULL terminated UTF-8 string or NULL. The caller should * free it by calling XP_FREE when it is no longer needed. * @see INTL_UTF8ToUCS2 */ PUBLIC UNICVTAPI unsigned char *INTL_UCS2ToUTF8( const uint16 *ucs2p, int32 num_chars ); /*@}*/ /*=======================================================*/ /**@name String Comparison */ /*@{*/ /** * Case insensitive comparison. * * This function is multibyte charset safe. It will consider characters * boundary correctly. It also ignore case by considering the charset * it used. * * @param charSetID Specifies the encoding of text1 and text2. * @param text1 Specifies address of text1. * @param text2 Specifies address of text2. * @param charlen Returns the length in byte of text1. * @return true if the text1 and text2 point to the same character, * ignoring the case, false otherwise. * @see INTL_MatchOneCaseChar * @see INTL_Strstr * @see INTL_Strcasestr */ PUBLIC XP_Bool INTL_MatchOneChar( int16 charSetID, unsigned char *text1, unsigned char *text2, int *charlen ); /** * Case sensitive comparison. * * This function is multibyte charset safe. It will consider characters * boundary correctly. * * @param charSetID Specifies the encoding of text1 and text2. * @param text1 Specifies address of text1. * @param text2 Specifies address of text2. * @param charlen Returns length in bytes of text1. * @return true if the text1 and text2 point to the same character (same case), * false otherwise. * @see INTL_MatchOneChar * @see INTL_Strstr * @see INTL_Strcasestr */ PUBLIC XP_Bool INTL_MatchOneCaseChar( int16 charSetID, unsigned char *text1, unsigned char *text2, int *charlen ); /** * Case sensitive sub-string search. * * This function is multibyte charset safe. It will consider characters * boundary correctly. * * @param charSetID Specifies the encoding of s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return NULL if s1 does not contains s2, * otherwise, return the address of the sub-string in s1. * @see INTL_MatchOneChar * @see INTL_MatchOneCaseChar * @see INTL_Strcasestr */ PUBLIC char *INTL_Strstr( int16 charSetID, const char *s1, const char *s2 ); /** * Case insensitive sub-string search. * * This function is multibyte charset safe. It will consider characters * boundary correctly. It also ignore case by considering the charset it * used. * * @param charSetID Specifies the encoding of s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return NULL if s1 does not contains s2, * otherwise, return the address of the sub-string in s1. * @see INTL_MatchOneChar * @see INTL_MatchOneCaseChar * @see INTL_Strstr */ PUBLIC char *INTL_Strcasestr( int16 charSetID, const char *s1, const char *s2 ); /* Function to support correct mail/news comparison: INTL_GetNormalizeStr INTL_GetNormalizeStrFromRFC1522 INTL_StrContains INTL_StrIs INTL_StrBeginWith INTL_StrEndWith Example: XP_Bool MailHeaderContains(csid, header, str) { XP_Bool result = FALSE; unsigned char* n_str = INTL_GetNormalizeStr(csid, str); unsigned char* n_header = INTL_GetNormalizeStrFromRFC1522(csid, header); if((NULL != n_str) && (NULL != n_header)) result = INTL_StrContains(csid, n_header, n_str); if(n_str) XP_FREE(n_str); if(n_header) XP_FREE(n_header); return result; } */ /** * Normalize a string, by dropping the case of the characters. * * The return value could be used with INTL_StrContains, INTL_StrIs, * INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This * function normalizes a string by dropping the case of character according to * the charSetID the caller passed in. It also ignores CR and LF characters. * * @param charSetID Specifies the encoding of str * @param str Specifies the to-be-normalized string. * @return a normalized string which could be used in INTL_StrContains, * INTL_StrIs, INTL_StrBeginWith and INTL_StrEndWith The caller should * free it by calling XP_FREE when it is not needed. * @see INTL_GetNormalizeStrFromRFC1522 * @see INTL_StrContains * @see INTL_StrIs * @see INTL_StrBeginWith * @see INTL_StrEndWith */ PUBLIC unsigned char* INTL_GetNormalizeStr( int16 charSetID, unsigned char* str ); /** * Test if string s1 contains string s2. * * This function is multibyte charset safe. It will consider characters * boundary correctly. To do string matching with ignoring the case of * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) * before call this function. * * @param charSetID Specifies the encoding for s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return true if s1 contains s2, * false otherwise * @see INTL_GetNormalizeStr * @see INTL_GetNormalizeStrFromRFC1522 * @see INTL_StrIs * @see INTL_StrBeginWith * @see INTL_StrEndWith */ PUBLIC XP_Bool INTL_StrContains( int16 charSetID, unsigned char* str1, unsigned char* str2 ); /** * Test if string s1 is string s2. * * This function is multibyte charset safe. It will consider characters boundary * correctly. To do string matching with ignoring the case of character, call * INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) before calling this * function. * * @param charSetID Specifies the encoding for s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return true if two string are equal, false otherwise * @see INTL_GetNormalizeStr * @see INTL_GetNormalizeStrFromRFC1522 * @see INTL_StrContains * @see INTL_StrBeginWith * @see INTL_StrEndWith */ PUBLIC XP_Bool INTL_StrIs( int16 charSetID, unsigned char* str1, unsigned char* str2 ); /** * Test if string s1 begin with string s2. * * This function is multibyte charset safe. It will consider characters * boundary correctly. To do string matching with ignoring the case of * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) * before calling this function. * * @param charSetID Specifies the encoding for s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return true if the first string is begin with the second string, * false otherwise * @see INTL_GetNormalizeStr * @see INTL_GetNormalizeStrFromRFC1522 * @see INTL_StrContains * @see INTL_StrIs * @see INTL_StrEndWith */ PUBLIC XP_Bool INTL_StrBeginWith( int16 charSetID, unsigned char* str1, unsigned char* str2 ); /** * Test if string s1 end with string s2. * * This function is multibyte charset safe. It will consider characters * boundary correctly. To do string matching with ignoring the case of * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) * before calling this function. * * @param charSetID Specifies the encoding for s1 and s2. * @param s1 Specifies the first string * @param s2 Specifies the second string * @return true if the first string is end with the second string, false * otherwise. * @see INTL_GetNormalizeStr * @see INTL_GetNormalizeStrFromRFC1522 * @see INTL_StrContains * @see INTL_StrIs * @see INTL_StrBeginWith */ PUBLIC XP_Bool INTL_StrEndWith( int16 charSetID, unsigned char* str1, unsigned char* str2 ); /** * Return a (hacky) XPAT pattern for NNTP server for searching pre * RFC 1522 message header. * * This is a hacky function which try to work around another HACK!!! The * problem it tries to solve is to search on NNTP, internet newsgroup server. * Unfortunately, the NNTP server does not have non-ASCII text searching * command. The only functionality in the NNTP protocol we could use is the * XPAT extension of NNTP (see * <A HREF=ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt> * ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt</A> or * <A HREF=ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt> * ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt</A> ). * XPAT use wildmat regular expression (see <A HREF= * http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html> * http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html</A> * for details) to provide string matching. Unfortunately, wildmat is not * designed to support non-ASCII text. It work for English header but not for * header in other language like Japanese, French, or German. The problem is * the XPAT/wildmat cannot deal with (1) ISO-2022-xx encoding nor (2) RFC 1522 * header. To work around the limitation in the protocol, we put together this * function to support the first limitation as possible as we can. This * function take one search string, and return a XPAT pattern which could then * be used to send to NNTP XPAT as search argument. However, there are some * limitation here. (1) It may cause NNTP return more message than it should, * the reason is the XPAT won't respect to the multibyte character boundary * when it try to match the string. To improve this in the future, the client * double check the header after it receive message from the server and narrow * it down to the correct case. (2) The pattern it generated won't match RFC * 1522 header so it could return less message than it should. This is because * there are more than one XPAT could match the sting in the case of RFC 1522 * header. To improve this in the future, the client side should send several * possible XPAT patterns (with the patterned return by this function), collect * the result, and then double checking in the client side. Of course, improve * the NNTP protocol itself is the real solution. But the improvement stated * above is also needed for the server support the current NNTP protocol. This * function (1) convert the text from the encoding the argument specified into * the encoding used in the corresponding internet newsgroup, (2) strip out * leading or trailing ISO-2022 escape sequence if present, (3) escape the * wildmat special characters (any characters which is not from 0-9, a-z, A-Z), * and return. * * @param winCharSetID Specifies the encoding of searchString. * @param searchString Specifies the string to be search through NNTP XPAT * command. * @return the pattern should be send to NNTP XPAT command for searching * non-ASCII header. The caller need to free this by calling XP_FREE * when the result is no longer needed. */ PUBLIC unsigned char* INTL_FormatNNTPXPATInNonRFC1522Format( int16 winCharSetID, unsigned char* searchString ); /*@}*/ /*=======================================================*/ /**@name Charset ID Iterator */ /*@{*/ /** * An object that can iterate through a list of charset ID. * * @see INTL_CSIDIteratorCreate * @see INTL_CSIDIteratorDestroy * @see INTL_CSIDIteratorNext */ typedef void* INTL_CSIDIterator; /** * Returns a new iterator object to search charset IDs for a particular * conversion. * * This function searches a built-in table to look for charset converters * that could be used for a particular purpose. The only purpose currently * supported is the IMAP4 conversion. This function puts the mail and news * charset IDs corresponding to the given charset ID at the top of the list * of IDs to try. After that, it inserts the "to" charset IDs of all entries * matching the given "from" ID. * * @param iterator_return Returns a new iterator object * @param charSetID Specifies the charset ID to convert from * @param flag Specifies the type of conversion * Currently, the only valid value is * csiditerate_TryIMAP4Search . * * @see INTL_CSIDIteratorNext, INTL_CSIDIteratorDestroy */ PUBLIC void INTL_CSIDIteratorCreate( INTL_CSIDIterator *iterator, int16 charSetID, int flag ); /** * Frees the given iterator, and sets given pointer to NULL. * * This function destroys the object created by INTL_CSIDIteratorCreate. * * @param iterator Specifies the iterator object to destroy * @see INTL_CSIDIteratorCreate */ PUBLIC void INTL_CSIDIteratorDestroy( INTL_CSIDIterator *iterator ); /** * Returns the next charset ID in the given iterator, if any. * * The return value is TRUE if a charset ID was found. The charset ID * is returned in pCharSetID. Otherwise, the return value is FALSE, and * pCharSetID remains untouched. * * @param iterator Specifies the iterator object * @param pCharSetID Returns the next charset ID * @return TRUE if there are more elements to be iterate, otherwise FALSE * @see INTL_CSIDIteratorCreate, INTL_CSIDIteratorDestroy */ PUBLIC XP_Bool INTL_CSIDIteratorNext( INTL_CSIDIterator *iterator, int16 *pCharSetID ); /*@}*/ /*=======================================================*/ /**@name Line/Word Breaking */ /*@{*/ /** * Line breaking information. * * <UL> * <LI> * PROHIBIT_NOWHERE - * It is a breakable character. It could be break before * or after this character. This class is for all * Kanji ideographic character. * <LI> * PROHIBIT_BEGIN_OF_LINE - * It should not appeared in the beginning of the line. * <LI> * PROHIBIT_END_OF_LINE - * It should not appeared in the end of the line. * <LI> * PROHIBIT_WORD_BREAK - * It is non breakable character. It cannot be break * if the next (or previous) character is also * PROHIBIT_WORD_BREAK. * </UL> * * @see INTL_KinsokuClass */ enum LINE_WRAP_PROHIBIT_CLASS{ PROHIBIT_NOWHERE, PROHIBIT_BEGIN_OF_LINE, PROHIBIT_END_OF_LINE, PROHIBIT_WORD_BREAK }; /** * Basic Japanese word breaking information. * * <UL> * <LI> * SEVEN_BIT_CHAR - e.g. ASCII * <LI> * HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana * <LI> * FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS * <LI> * FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana * <LI> * KANJI_CHAR - ideographic * <LI> * UNCLASSIFIED_CHAR - others * </UL> * * @see INTL_CharClass */ enum WORD_BREAK_CLASS{ SEVEN_BIT_CHAR, HALFWIDTH_PRONOUNCE_CHAR, FULLWIDTH_ASCII_CHAR, FULLWIDTH_PRONOUNCE_CHAR, KANJI_CHAR, UNCLASSIFIED_CHAR }; /** * Returns the code point that represent the non-breaking space character. * * The current implementation return the same value regardless of the given * charset. However, the return value is platform dependent. * The information then is used by parser and layout code. * * Using this function with caution as it is tied to * the current HTML parser implementation. * * @param winCharSetID Specifies the window charset id. * @return the code point which Non Breaking Space in a * C style NULL terminated string. * @see */ PUBLIC const char *INTL_NonBreakingSpace( uint16 winCharSetID ); /** * Returns information for basic Japanese word breaking. * * Given a character pointer and charset, returns a word breaking * character class for the given character. * It is necessary to pass a pointer because the * character may be more than one byte. * * In the future, the definition of word breaking classes needs to be * extended. * * @param winCharSetID Specifies the window charset ID * @param pstr Specifies the pointer to the character * @return Character class for word breaking: * <UL> * <LI> * SEVEN_BIT_CHAR - e.g. ASCII * <LI> * HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana * <LI> * FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS * <LI> * FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana * <LI> * KANJI_CHAR - ideographic * <LI> * UNCLASSIFIED_CHAR - others * </UL> * @see INTL_KinsokuClass * @see WORD_BREAK_CLASS */ PUBLIC int INTL_CharClass( int winCharSetID, unsigned char *pstr ); /** * Returns line breaking information. * * Given a character pointer and charset, returns a line breaking * character class for the given character. * It is necessary to pass a pointer because the * character may be more than one byte. * * * Please notice that the function currently only supports multibyte charsets. * If this is called for ascii charset, it always return PROHIBIT_WORD_BREAK. * * References for line breaking: * <UL> * <LI> * Japanese Standard Association, * JIS X 4501 1995 - Japanese Industrial Standard - * Line Composition rules for Japanese documents * <LI> * Ken Lunde, * Understanding Japanese Information Processing, * O'Reilly & Associates, Inc., * ISBN:1-56592-043-0, * pp.148 * <LI> * Nadine Kano, * Developing International Software For Windows 95 and Windows NT, * Microsoft Press, * ISBN:1-556-15-840-8, * pp.239-244 * </UL> * * * @param winCharSetID Specifies window charset ID. * @param pstr Specifies the pointer to the character * @return the kinsoku class for line breaking: * <UL> * <LI> * PROHIBIT_NOWHERE - * It is a breakable character. It could be break before * or after this character. This class is for all * Kanji ideographic character. * <LI> * PROHIBIT_BEGIN_OF_LINE - * It should not appeared in the beginning of the line. * <LI> * PROHIBIT_END_OF_LINE - * It should not appeared in the end of the line. * <LI> * PROHIBIT_WORD_BREAK - * It is non breakable character. It cannot be break * if the next (or previous) character is also * PROHIBIT_WORD_BREAK. * </UL> * @see INTL_CharClass * @see LINE_WRAP_PROHIBIT_CLASS */ PUBLIC int INTL_KinsokuClass( int16 winCharSetID, unsigned char *pstr ); /** * Returns the column width of the given character. * * In some countries, old terminals use full-width and half-width characters. * This function returns the number of "columns" taken up by the given * character. For example, in Japan, normal characters take up 2 columns, * while half-width characters take up 1 column each. * * Returns 1 for charsets that do not distinguish between half-width and * full-width characters. * * @param winCharSetID Specifies the charset ID of the text * @param pstr Specifies the character * @return The column width of the given character * @see INTL_IsHalfWidth */ PUBLIC int INTL_ColumnWidth( int winCharSetID, unsigned char *pstr ); /** * Truncates a long string by replacing excess characters in the middle * with "...". * * The output_return pointer may be the same as the input pointer. * * @param winCharSetID Specifies the charset ID of the text * @param input Specifies the text to be mid-truncated * @param output_return Returns the mid-truncated text * @param max_length Specifies the desired number of bytes to be placed in * the output buffer, minus 1 for null terminator */ PUBLIC void INTL_MidTruncateString( int16 winCharSetID, const char *input, char *output_return, int max_length ); /** * Returns whether or not the given character is a half-width character. * * In some countries, certain characters are normal width on old terminals, * while other characters are half-width. For example, normal Japanese * characters are considered normal width, while "hankaku kana" are * half-width, as are the ASCII characters. * * @param winCharSetID Specifies the charset ID of the text * @param pstr Specifies the character * @return * 0 if the given character is ASCII or the charset do not normally * distinguish between half-width and full-width, * 1 if the given character is half-width * @see INTL_ColumnWidth */ PUBLIC int INTL_IsHalfWidth( uint16 winCharSetID, unsigned char *pstr ); /*@}*/ /*=======================================================*/ /**@name Document Context Handling */ /*@{*/ /** * Request a re-layout of the document. * * Libi18n calls this function in those cases where a different document * encoding is detected after document conversion and layout has begun. * This can occur because the parsing and layout of the document begins * immediately when the document data begins to stream in - at which time * all the data needed to determine the charset may not be available. If * this occurs, the layout engine needs to be notified to pull the data from * the source (cache) again so the data will be converted by the correct * character codeset conversion module in the data stream. * * @param context Specifies the context which should be relayout again. */ PUBLIC void INTL_Relayout(iDocumentContext context); /** * Returns name of the document charset. * * The returned string is suitable for use in the window brought up by * View | Page Info (previously known as Document Info). It also provides * information such as whether this charset was auto-detected. * * @param doc_context Specifies the document context * @return Name (string) of the document charset */ PUBLIC char *INTL_CharSetDocInfo( iDocumentContext doc_context ); /** * Get the UI charset encoding setting. * * Gets the currently selected charset encoding for this document * (not the global default and not the detected document encoding). * * @param context Specifies document context * @return Document charset ID selected by the user * @see */ PUBLIC uint16 FE_DefaultDocCharSetID( iDocumentContext context ); /** * Change the default document charset ID. * * This function is currently only implemented and called by the Windows * platform. It will be removed in the future to keep the consistency between * platforms. * * @param defaultDocCharSetID Specifies the new default document charset ID * @version DEPRECATED. Do not use this function. */ #if defined(XP_WIN) || defined(XP_OS2) PUBLIC void INTL_ChangeDefaultCharSetID(int16 defaultDocCharSetID); #endif /** * Return default charset from preference or from current encoding * menu selection. * * @param context Specifies the context * @return Default document charset ID. If the context is NULL * then it returns default charset from the user preference. * If the context is specified then it returns current * encoding menu selection. */ PUBLIC int16 INTL_DefaultDocCharSetID(iDocumentContext context); /** * Returns the default window charset ID for the given document context. * * If context is NULL, or the context's window charset ID is zero, this * function calls INTL_DefaultWinCharSetID, passing the same context. * * @param context Specifies the document context * @return The default window charset ID for this document context * @see INTL_DefaultWinCharSetID */ PUBLIC int16 INTL_DefaultTextAttributeCharSetID( iDocumentContext context ); /** * Returns the default window charset ID for the given document context. * * If context is NULL, or if the context's window charset ID is zero, this * function calls INTL_DefaultDocCharSetID, passing the same context, and then * calls INTL_DocToWinCharSetID on the result. * * @param context Specifies the document context * @return Default window charset ID for this document context * @see INTL_DefaultDocCharSetID, INTL_DocToWinCharSetID */ PUBLIC int16 INTL_DefaultWinCharSetID( iDocumentContext context ); /** * Set up the charset conversion stream module. * * This function gets the charset info object from the context, and then * picks up the relayout flag and the document charset ID before calling * INTL_CSIInitialize. It then creates the appropriate charset converter * to convert from the document to window charset. The stream is set up * by setting the various function pointers (put, abort, complete, etc). * It then hooks up to the next stream module "INTERNAL_PARSER", the HTML * parser and layout engine. This is done by rewriting URL_s' content_type * field. * * @param format_out Specifies the type of stream * @param data_obj Ignored * @param URL_s Specifies the URL object * @param window_id Specifies the context * @return Stream object corresponding to this charset conversion module * @see INTL_CSIInitialize, NET_StreamBuilder */ PUBLIC Stream *INTL_ConvCharCode( int format_out, void *data_obj, URL *URL_s, iDocumentContext window_id ); /** * Converts mail charset to display charset used by current window. * * It decides which display charset to use based on current default language. * Caller is responsible for deallocating memory. * * @param context the context (window ID). * @param bit7buff Source buffer. * @param block_size the length of the source buffer. * @return Destination buffer. If NULL, this means either conversion failed or * did single-byte to single-byte conversion. */ PUBLIC unsigned char *INTL_ConvMailToWinCharCode( iDocumentContext context, unsigned char *bit7buff, uint32 block_size ); /*@}*/ /*=======================================================*/ /**@name Platform Independent String Resources */ /*@{*/ /** * Return the Charset name of the translated resource. * * @return MIME charset of the cross-platform string resource and FE * resources * @see XP_GetString * @see XP_GetStringForHTML */ PUBLIC char * INTL_ResourceCharSet(void); /*@}*/ /*=======================================================*/ XP_END_PROTOS #endif /* INTL_LIBI18N_H */