home *** CD-ROM | disk | FTP | other *** search
- /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- *
- * The contents of this file are subject to the Netscape Public License
- * Version 1.0 (the "NPL"); you may not use this file except in
- * compliance with the NPL. You may obtain a copy of the NPL at
- * http://www.mozilla.org/NPL/
- *
- * Software distributed under the NPL is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
- * for the specific language governing rights and limitations under the
- * NPL.
- *
- * The Initial Developer of this code under the NPL is Netscape
- * Communications Corporation. Portions created by Netscape are
- * Copyright (C) 1998 Netscape Communications Corporation. All Rights
- * Reserved.
- */
-
-
- /* libi18n.h */
-
-
- #ifndef INTL_LIBI18N_H
- #define INTL_LIBI18N_H
-
- #include "xp.h"
- #ifndef iDocumentContext
- #define iDocumentContext MWContext *
- #endif
- #define Stream NET_StreamClass
- #define URL URL_Struct
- #include "csid.h"
-
- #ifdef _UNICVT_DLL_
-
- #ifdef XP_WIN32
- #define UNICVTAPI __declspec(dllexport)
-
- #else
- #define UNICVTAPI
- #endif
-
- #else /* _UNICVT_DLL is undefined */
- #define UNICVTAPI
- #endif
-
- /* Enum for INTL_CSIDIteratorCreate */
- enum {
- csiditerate_TryIMAP4Search = 1
- };
-
-
-
- /*
- * To be called when backend catches charset info on <meta ... charset=...> tag.
- * This will force netlib to go get fresh data again either through cache or
- * network.
- */
- enum
- {
- METACHARSET_NONE = 0,
- METACHARSET_HASCHARSET,
- METACHARSET_REQUESTRELAYOUT,
- METACHARSET_FORCERELAYOUT,
- METACHARSET_RELAYOUTDONE
- };
-
- XP_BEGIN_PROTOS
-
-
- /*=======================================================*/
- /* Character Code Conversion (CCC).
- *
- *
- * CCCDataObject accessor functions are
- * build as a table to allow access from a DLL
- *
- * Note: new functions must be added at the end
- * or old apps using the new dll will fail
- */
- /**@name Character Code Conversion (CCC) */
- /*@{*/
-
- /**
- * Function Prototype for the codeset conversion function.
- *
- * @param obj Specifies the converter object
- * @param src Specifies the text to be converted
- * @param srclen Specifies the length of src
- * @return the converted text. The length of the converted result could be
- * access via INTL_GetCCCLen(obj)
- * @see INTL_GetCCCLen
- * @see INTL_SetCCCCvtfunc
- *
- */
- typedef unsigned char *(*CCCFunc)(CCCDataObject obj, const unsigned char * src, int32 srclen);
-
- /**
- * Function Prototype for the Report Auto Detect Result function.
- *
- * @param closure Specifies the closure which associated with the converter
- * object by calling INTL_SetCCCReportAutoDetect
- * @param obj Specifies the converter object
- * @param doc_csid Specifies the auto-detected document csid
- * @see INTL_SetCCCReportAutoDetect
- *
- */
- typedef void (*CCCRADFunc)(void * closure, CCCDataObject obj, uint16 doc_csid);
-
- /**
- * Opaque converter object.
- *
- * This struct is an opaque converter object.
- */
- struct OpaqueCCCDataObject { /* WARNING: MUST MATCH REAL STRUCT */
- /** pointer to the converter object private functions struct */
- struct INTL_CCCFuncs *funcs_pointer;
- };
-
- /**
- * This structure hold the private functions of a conversion object.
- *
- * <B>WARNING: THIS STRUCT AND THE TABLE MUST BE IN SYNC WITH EACH OTHER </B>
- */
- struct INTL_CCCFuncs {
- /** The private function of INTL_SetCCCReportAutoDetect. */
- void (*set_report_autodetect)(CCCDataObject, CCCRADFunc, void*);
- /** The private function of INTL_CallCCCReportAutoDetect. */
- void (*call_report_autodetect)(CCCDataObject, uint16);
- /** The private function of INTL_SetCCCCvtfunc. */
- void (*set_cvtfunc)(CCCDataObject, CCCFunc);
- /** The private function of INTL_GetCCCCvtfunc. */
- CCCFunc (*get_cvtfunc)(CCCDataObject);
- /** The private function of INTL_SetCCCJismode. */
- void (*set_jismode)(CCCDataObject,int32);
- /** The private function of INTL_GetCCCJismode. */
- int32 (*get_jismode)(CCCDataObject);
- /** The private function of INTL_SetCCCCvtflag. */
- void (*set_cvtflag)(CCCDataObject,int32);
- /** The private function of INTL_GetCCCCvtflag. */
- int32 (*get_cvtflag)(CCCDataObject);
- /** The private function of INTL_GetCCCUncvtbuf. */
- unsigned char* (*get_uncvtbuf)(CCCDataObject);
- /** The private function of INTL_SetCCCDefaultCSID. */
- void (*set_default_doc_csid)(CCCDataObject, uint16);
- /** The private function of INTL_GetCCCDefaultCSID. */
- uint16 (*get_default_doc_csid)(CCCDataObject);
- /** The private function of INTL_SetCCCFromCSID. */
- void (*set_from_csid)(CCCDataObject, uint16);
- /** The private function of INTL_GetCCCFromCSID. */
- uint16 (*get_from_csid)(CCCDataObject);
- /** The private function of INTL_SetCCCToCSID. */
- void (*set_to_csid)(CCCDataObject, uint16);
- /** The private function of INTL_GetCCCToCSID. */
- uint16 (*get_to_csid)(CCCDataObject);
- /** The private function of INTL_SetCCCRetval. */
- void (*set_retval)(CCCDataObject, int);
- /** The private function of INTL_GetCCCRetval. */
- int (*get_retval)(CCCDataObject);
- /** The private function of INTL_SetCCCLen. */
- void (*set_len)(CCCDataObject, int32);
- /** The private function of INTL_GetCCCLen. */
- int32 (*get_len)(CCCDataObject);
- };
-
- /**
- * Create and initialize Character Code Converter Object.
- *
- * Create and initialize character code converter.
- * It also set up a converter if a doc_csid is known (by DOC_CSID_KNOWN).
- * Caller is responsible for deallocation of an allocated memory.
- *
- * @param c Pointer to an i18n private data structure.
- * @param default_doc_csid Default doc_csid to be used.
- * @return CCCDataObject Created character code converter object pointer.
- */
- PUBLIC CCCDataObject INTL_CreateDocumentCCC(
- INTL_CharSetInfo c,
- uint16 default_doc_csid
- );
-
- /**
- * Look for a converter from one charset to another.
- *
- * If the from_csid is CS_DEFAULT, this function uses the ID returned by
- * INTL_GetCCCDefaultCSID. If the to_csid is zero, this function uses the ID
- * returned by INTL_DocToWinCharSetID for the from_csid determined above.
- * If found, the converter function is stored in the given character code
- * conversion object.
- *
- * @param from_csid Specifies the charset ID to convert from
- * @param to_csid Specifies the charset ID to convert to
- * @param obj Specifies the character code converter object
- * @return 1 for success, 0 for failure
- * @see INTL_CreateCharCodeConverter, INTL_CallCharCodeConverter
- */
- PUBLIC int INTL_GetCharCodeConverter(
- int16 from_csid,
- int16 to_csid,
- CCCDataObject obj
- );
-
- /**
- * Set up charset internal data by meta charset.
- *
- * Given a charset name, this will set up i18n private charset info
- * which is obtained by a given context.
- * Input charset name should be obtained from HTML META tag.
- *
- * @param context Context to be set up.
- * @param charset_tag Charset name as an input (e.g. iso-8859-1).
- * @see INTL_CSIReportMetaCharsetTag
- */
- PUBLIC void INTL_CCCReportMetaCharsetTag(
- MWContext *context,
- char *charset_tag
- );
-
- /**
- * Passes some more text to the character code converter.
- *
- * The character code converter object keeps track of the current state as it
- * receives data to convert. If partial characters are received, they are
- * buffered until this function is called again.
- * INTL_GetCharCodeConverter must first be called before calling this function.
- *
- * In some cases, the text is converted in place (in the input buffer).
- *
- * @param obj Specifies the character code converter object
- * @param str Specifies the text to be converted
- * @param len Specifies the length in bytes of the text
- * @return The converted text, null terminated
- * @see INTL_GetCharCodeConverter
- */
- PUBLIC unsigned char *INTL_CallCharCodeConverter(
- CCCDataObject obj,
- const unsigned char *str,
- int32 len
- );
-
- /**
- * Initialize and set up a character code converter for a mail charset.
- *
- * Allocate memory and initialize for character code converter.
- * From/To charset is determined by given context or by parsing the source
- * buffer in case of HTML.
- * After charsets are determined, it set up a converter function.
- * Caller is responsible for deallocation of an allocated memory.
- *
- * @param context Context to access charset info.
- * @param isHTML If TRUE then the input stream is parsed for meta tag.
- * @param buffer Source buffer.
- * @param buffer_size the length of the source buffer.
- * @return CCCDataObject Created character code converter object pointer.
- * @see INTL_CreateCharCodeConverter
- */
- PUBLIC CCCDataObject INTL_CreateDocToMailConverter(
- iDocumentContext context,
- XP_Bool isHTML,
- unsigned char *buffer,
- uint32 buffer_size
- );
-
- /**
- * Create a character code converter object used for codeset conversion.
- *
- * @return The new character code converter object
- * @see INTL_CreateDocumentCCC, INTL_GetCharCodeConverter,
- * INTL_DestroyCharCodeConverter
- * @deprecated Obsolescent. Please use INTL_CreateDocumentCCC.
- */
- PUBLIC CCCDataObject INTL_CreateCharCodeConverter(void);
-
- /**
- * Frees the given character code conversion object.
- *
- * This function destroys the code conversion object created by
- * INTL_CreateCharCodeConverter.
- *
- * @param obj Specifies the character code conversion object to free
- * @see INTL_CreateCharCodeConverter
- */
- PUBLIC void INTL_DestroyCharCodeConverter(
- CCCDataObject obj
- );
-
- /**
- * Converts a piece of text from one charset to another.
- *
- * This function does not do charset ID auto-detection. The caller must pass
- * the from/to charset IDs. This function does not keep state. Don't use it to
- * convert a stream of data. Only use this when you want to convert a string,
- * and you have no way to hold on to the converter object.
- *
- * If the string gets converted in place (use the input buffer), then this
- * function returns NULL.
- *
- * @param fromcsid Specifies the charset ID to convert from
- * @param tocsid Specifies the charset ID to convert to
- * @param pSrc Specifies the input text
- * @param block_size Specifies the number of bytes in the input text
- * @return The converted text, null terminated, or NULL if converted in place
- * @see INTL_CallCharCodeConverter
- */
- PUBLIC unsigned char *INTL_ConvertLineWithoutAutoDetect(
- int16 fromcsid,
- int16 tocsid,
- unsigned char *pSrc,
- uint32 block_size
- );
-
- /**
- * Returns the window charset ID corresponding to the given document charset ID.
- *
- * This function searches a built-in table to find the first entry that
- * matches the given document charset ID. If no such entry is found, it
- * returns CS_FE_ASCII.
- *
- * @param csid Specifies the document charset ID
- * @return The corresponding window charset ID
- */
- PUBLIC int16 INTL_DocToWinCharSetID(
- int16 csid
- );
-
- /**
- * Return the charset used in internet message from a specified charset.
- *
- * In the current implementation of Communicator, we assume there is a many to
- * one relationship between a encoding and a encoding used on internet mail
- * message. This routines is used to get the outgoing encoding for a specified
- * encoding. The caller than can convert the text of the specified encoding to
- * the return encoding and before send out the internet message. Usually the
- * relationship is the same as the newsgroup posting and this one. However, for
- * some region/country like Korean, it is not the same. In such region/country,
- * they use different encodings in internet mail message and newsgroup posting.
- * In that case INTL_DefaultNewsCharSetID should be used instead.
- *
- * Issues: The current model assume the text of a particular encoding is always
- * sending out as one encoding. Such assumption break when people want send out
- * message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we
- * may change this architecture in the near future.
- *
- * The mapping are:
- * <UL>
- * <LI>CS_ASCII: CS_ASCII
- * <LI>CS_LATIN1: CS_LATIN1
- * <LI>CS_JIS: CS_JIS
- * <LI>CS_SJIS: CS_JIS
- * <LI>CS_EUCJP: CS_JIS
- * <LI>CS_JIS_AUTO: CS_JIS
- * <LI>CS_SJIS_AUTO: CS_JIS
- * <LI>CS_EUCJP_AUTO: CS_JIS
- * <LI>CS_KSC_8BIT: CS_2022_KR [Note 1]
- * <LI>CS_KSC_8BIT_AUTO: CS_2022_KR [Note 1]
- * <LI>CS_GB_8BIT: CS_GB_8BIT
- * <LI>CS_BIG5: CS_BIG5
- * <LI>CS_CNS_8BIT: CS_BIG5
- * <LI>CS_MAC_ROMAN: CS_LATIN1
- * <LI>CS_LATIN2: CS_LATIN2
- * <LI>CS_MAC_CE,: CS_LATIN2
- * <LI>CS_CP_1250: CS_LATIN2
- * <LI>CS_8859_5: CS_KOI8_R [Note 2]
- * <LI>CS_KOI8_R: CS_KOI8_R [Note 2]
- * <LI>CS_MAC_CYRILLIC: CS_KOI8_R [Note 2]
- * <LI>CS_CP_1251: CS_KOI8_R [Note 2]
- * <LI>CS_8859_7: CS_8859_7
- * <LI>CS_CP_1253: CS_8859_7
- * <LI>CS_MAC_GREEK: CS_8859_7
- * <LI>CS_8859_9: CS_8859_9
- * <LI>CS_MAC_TURKISH: CS_8859_9
- * <LI>CS_UTF8: CS_UTF7
- * <LI>CS_UTF7: CS_UTF7
- * <LI>CS_UCS2: CS_UTF7
- * <LI>CS_UCS2_SWAP: CS_UTF7
- * </UL>
- * Note:
- * <OL>
- * <LI>For INTL_DefaultNewsCharSetID, this value is different
- * <LI>The value is the one specified in preference
- * "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See
- * <A HREF=http://people.netscape.com/ftang/cyrillicmail.html>
- * http://people.netscape.com/ftang/cyrillicmail.html</A> for details.
- * </OL>
- *
- * @param Specifies the encoding
- * @return the encoding should be send out for the internet mail message.
- * @see INTL_DefaultNewsCharSetID
- */
- PUBLIC int16 INTL_DefaultMailCharSetID(int16 csid);
-
- /**
- * Return the charset used in internet message from a specified charset.
- *
- * In the current implementation of Communicator, we assume there is a many to
- * one relationship between a encoding and a encoding used on internet
- * newsgroup posting. This routines is used to get the outgoing encoding for a
- * specified encoding. The caller than can convert the text of the specified
- * encoding to the return encoding and before post the message to the
- * newsgroup. Usually the relationship is the same as the newsgroup posting
- * and this one. However, for some region/country like Korean, it is not the
- * same. In such region/country, they use different encodings in internet mail
- * message and newsgroup posting. In that case INTL_DefaultMailCharSetID should
- * be used instead.
- *
- * Issues: The current model assume the text of a particular encoding is always
- * sending out as one encoding. Such assumption break when people want send out
- * message in different Cyrillic, Chinese, or Unicode encoding. Therefore, we
- * may change this architecture in the near future.
- *
- * The mapping are:
- * <UL>
- * <LI>ASCII: CS_ASCII
- * <LI>LATIN1: CS_LATIN1
- * <LI>JIS: CS_JIS
- * <LI>SJIS: CS_JIS
- * <LI>EUCJP: CS_JIS
- * <LI>JIS_AUTO: CS_JIS
- * <LI>SJIS_AUTO: CS_JIS
- * <LI>EUCJP_AUTO: CS_JIS
- * <LI>KSC_8BIT: CS_KSC_8BIT [Note 1]
- * <LI>KSC_8BIT_AUTO: CS_KSC_8BIT [Note 1]
- * <LI>GB_8BIT: CS_GB_8BIT
- * <LI>BIG5: CS_BIG5
- * <LI>CNS_8BIT: CS_BIG5
- * <LI>MAC_ROMAN: CS_LATIN1
- * <LI>LATIN2: CS_LATIN2
- * <LI>MAC_CE,: CS_LATIN2
- * <LI>CP_1250: CS_LATIN2
- * <LI>8859_5: CS_KOI8_R [Note 2]
- * <LI>KOI8_R: CS_KOI8_R [Note 2]
- * <LI>MAC_CYRILLIC: CS_KOI8_R [Note 2]
- * <LI>CP_1251: CS_KOI8_R [Note 2]
- * <LI>8859_7: CS_8859_7
- * <LI>CP_1253: CS_8859_7
- * <LI>MAC_GREEK: CS_8859_7
- * <LI>8859_9: CS_8859_9
- * <LI>MAC_TURKISH: CS_8859_9
- * <LI>UTF8: CS_UTF7
- * <LI>UTF7: CS_UTF7
- * <LI>UCS2: CS_UTF7
- * <LI>UCS2_SWAP: CS_UTF7
- * </UL>
- * Note:
- * <OL>
- * <LI>For INTL_DefaultMailCharSetID, this value is different
- * <LI>The value is the one specified in preference
- * "intl.mailcharset.cyrillic". The default value is CS_KOI_R. See
- * <A HREF=http://people.netscape.com/ftang/cyrillicmail.html>
- * http://people.netscape.com/ftang/cyrillicmail.html</A> for details.
- * </OL>
- *
- * @param Specifies the encoding
- * @return the encoding should be send out for the internet newsgroup.
- * @see INTL_DefaultMailCharSetID
- */
- PUBLIC int16 INTL_DefaultNewsCharSetID(int16 csid);
-
- /**
- * Tell libi18n which font charset IDs are available in the front end.
- *
- * The front end (FE) calls this function to inform libi18n of the charset IDs
- * of the fonts that are currently available.
- *
- * This function calls INTL_SetUnicodeCSIDList to set up the Unicode
- * machinery.
- *
- * The front end must allocate space for this array using malloc/calloc. If
- * this function is called more than once, the array passed in a previous call
- * is freed by this function. However, the front end is responsible for
- * freeing the array at exit time.
- *
- * @param charsets Specifies a null-terminated array of charset IDs
- */
- PUBLIC void INTL_ReportFontCharSets(
- int16 *charsets
- );
-
- /**
- * Get the "Unconverted Buffer" from the Converter Object.
- *
- * @param obj Specifies the converter object
- * @return the unconverted buffer in the converter object
- */
- #define INTL_GetCCCUncvtbuf(obj) (obj->funcs_pointer->get_uncvtbuf)(obj)
-
- /**
- * Set the "conversion result length" to the converter object.
- *
- * @param obj Specifies the converter object
- * @param len Specifies the length of current conversion result.
- * @see INTLGetCCCLen
- */
- #define INTL_SetCCCLen(obj,len) ((obj)->funcs_pointer->set_len)((obj), (len))
-
- /**
- * Get the "conversion result length" from the converter object.
- *
- * @param obj Specifies the converter object
- * @return the length of conversion result stored in the converter object
- * @see INTL_SetCCCLen
- */
- #define INTL_GetCCCLen(obj) ((obj)->funcs_pointer->get_len)(obj)
-
- /**
- * Set a private flag "Jismode" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n.
- *
- * The name "jismode" refers to the ISO 2022 state (JIS mode).
- * This is what the field was first used for.
- * It is now used for other purposes as well, so the name is no longer
- * appropriate.
- *
- * @param obj Specifies the converter object
- * @param jismode Specifies the Jismode
- * @see INTL_GetCCCJismode
- */
- #define INTL_SetCCCJismode(obj,jismode) \
- ((obj)->funcs_pointer->set_jismode)((obj), (jismode))
- /**
- * Get a private flag "Jismode" from the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * The name "jismode" refers to the ISO 2022 state (JIS mode).
- * This is what the field was first used for.
- * It is now used for other purposes as well, so the name is no longer
- * appropriate.
- *
- * @param obj Specifies the converter object
- * @return the Jismode stored in the converter object
- * @see INTL_SetCCCJismode
- */
- #define INTL_GetCCCJismode(obj) ((obj)->funcs_pointer->get_jismode)(obj)
-
- /**
- * Set a private flag "Cvtflag" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param cvtflag Specifies the Cvtflag
- * @see INTL_GetCCCCvtflag
- */
- #define INTL_SetCCCCvtflag(obj,cvtflag) \
- ((obj)->funcs_pointer->set_cvtflag)((obj), (cvtflag))
- /**
- * Get a private flag "Cvtflag" from the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @return the Cvtflag stored in the converter object
- * @see INTL_SetCCCCvtflag
- */
- #define INTL_GetCCCCvtflag(obj) ((obj)->funcs_pointer->get_cvtflag)(obj)
-
- /**
- * Set the "Convert To CSID" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param to_csid Specifies the Convert To CSID
- * @see INTL_SetCCCToCSID
- */
- #define INTL_SetCCCToCSID(obj,to_csid) \
- (((obj)->funcs_pointer->set_to_csid)((obj),(to_csid)))
- /**
- * Get the "Convert To CSID" from the converter object.
- *
- * @param obj Specifies the converter object
- * @return the "Convert To CSID" stored in the converter object
- * @see INTL_SetCCCToCSID
- */
- #define INTL_GetCCCToCSID(obj) (((obj)->funcs_pointer->get_to_csid)(obj))
-
- /**
- * Set the "Convert From CSID" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param from_csid Specifies the Convert From CSID
- * @see INTL_SetCCCFromCSID
- */
- #define INTL_SetCCCFromCSID(obj,from_csid) \
- (((obj)->funcs_pointer->set_from_csid)((obj),(from_csid)))
- /**
- * Get the "Convert From CSID" from the converter object.
- *
- * @param obj Specifies the converter object
- * @return the "Convert From CSID" stored in the converter object
- * @see INTL_SetCCCFromCSID
- */
- #define INTL_GetCCCFromCSID(obj) (((obj)->funcs_pointer->get_from_csid)(obj))
-
- /**
- * Set the "Return Value" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param retval Specifies the "Return Value"
- * @see INTL_GetCCCRetval
- */
- #define INTL_SetCCCRetval(obj,retval) \
- (((obj)->funcs_pointer->set_retval)((obj),(retval)))
- /**
- * Get the "Return Value" from the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @return the "Return Value" stored in the converter object
- * @see INTL_SetCCCRetval
- */
- #define INTL_GetCCCRetval(obj) (((obj)->funcs_pointer->get_retval)(obj))
-
- /**
- * Set the "Conversion Function" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param func Specifies the "Conversion Function" stored in the converter
- * object
- * @see INTL_GetCCCCvtfunc
- */
- #define INTL_SetCCCCvtfunc(obj,func) \
- (((obj)->funcs_pointer->set_cvtfunc)((obj),(func)))
-
- /**
- * Get the "Conversion Function" from the converter object.
- *
- * @param obj Specifies the converter object
- * @return the "Conversion Function" stored in the converter object
- * @see INTL_SetCCCCvtfunc
- */
- #define INTL_GetCCCCvtfunc(obj) ((obj)->funcs_pointer->get_cvtfunc)(obj)
-
- /**
- * Set the "Report Auto Detect Result Function" to the converter object.
- *
- * @param obj Specifies the converter object
- * @param func Specifies the "Auto Detect Result Reporting Function"
- * @param closure Specifies the closure which will be pass to the "Auto
- * Detect Result Reporting Function"
- * @see INTL_CallCCCReportAutoDetect
- */
- #define INTL_SetCCCReportAutoDetect(obj,func,closure) \
- (((obj)->funcs_pointer->set_report_autodetect)((obj), (func), (closure)))
-
- /**
- * Call the "Report Auto Detect Result Function" associated with the
- * converter object.
- *
- * @param obj Specifies the converter object
- * @param doc_csid Specifies the document csid which be auto detected
- * @see INTL_CallCCCReportAutoDetect
- */
- #define INTL_CallCCCReportAutoDetect(obj,doc_csid) \
- (((obj)->funcs_pointer->call_report_autodetect)((obj), (doc_csid)))
-
- /**
- * Set the "Default Document CSID" to the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @param default_doc_csid Specifies the Default Document CSID
- * @see INTL_GetCCCDefaultCSID
- */
- #define INTL_SetCCCDefaultCSID(obj,default_doc_csid) \
- ((obj)->funcs_pointer->set_default_doc_csid)((obj), (default_doc_csid))
-
- /**
- * Get the "Default Document CSID" from the converter object.
- *
- * There are no reason any code outside libi18n should call this.
- * We are considering move this into intlpriv.h.
- * Don't call this macro unless you are changing libi18n
- *
- * @param obj Specifies the converter object
- * @return the Default Document CSID stored in the converter object
- * @see INTL_GetCCCDefaultCSID
- */
- #define INTL_GetCCCDefaultCSID(obj) \
- (((obj)->funcs_pointer->get_default_doc_csid)(obj))
-
- /*@}*/
- /*=======================================================*/
- /**@name CharSetID and Charset Name Mapping */
- /*@{*/
- /**
- * Returns the preferred MIME charset name corresponding to the given
- * charset ID.
- *
- * Charset names are registered by IANA (Internet Assigned Numbers Authority).
- * The current charset name database can be found at:
- *
- * <A HREF=ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets>
- * ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets</A>.
- *
- * This function returns the charset name for the given Character Set ID
- * which in most cases corresponds to the "(preferred MIME name)" registered
- * with IANA. This function may return private names not found in the
- * registered. Private names start with "x-". See INTL_CharSetNameToID for
- * information about charset IDs.
- *
- * @param charSetID Specifies the charset ID
- * @param charset_return Returns the corresponding charset name, max 128 bytes
- * @see INTL_CharSetNameToID
- */
- PUBLIC void INTL_CharSetIDToName(
- int16 charSetID,
- char *charset_return
- );
-
- /**
- * Returns the charset ID corresponding to the given charset name.
- *
- * The charset ID is a private 16-bit integer, described in
- * ns/include/csid.h. If the given charset is unknown, CS_UNKNOWN is returned.
- * If the given charset is NULL, CS_DEFAULT is returned. Charset names are not
- * case-sensitive. See INTL_CharSetIDToName for a description of charset names.
- *
- * @param charset Specifies the charset name
- * @return the corresponding charset ID
- * @see INTL_CharSetIDToName
- */
- PUBLIC int16 INTL_CharSetNameToID(
- char *charset
- );
-
- /**
- * Returns a pointer to the preferred MIME charset name corresponding
- * to the given charset ID.
- *
- * This function is similar to INTL_CharSetIDToName. It returns a pointer to
- * the charset name. See INTL_CharSetIDToName for other details.
- *
- * @param charSetID Specifies the charset ID
- * @return The corresponding charset name
- * @see INTL_CharSetIDToName
- */
- PUBLIC unsigned char *INTL_CsidToCharsetNamePt(
- int16 charSetID
- );
-
- /**
- * Returns the Java charset name corresponding to the given charset ID.
- *
- * The Java charset name is one that JDK 1.1 and up will understand.
- * The Java name is defined in
- * <A HREF=
- * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303>
- * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A>
- *
- * @param charSetID Specifies the charset ID
- * @param charset_return Returns the corresponding Java charset name,
- * max 128 bytes
- * @see INTL_CharSetIDToJavaCharSetName
- */
- PUBLIC void INTL_CharSetIDToJavaName(
- int16 charSetID,
- char *charset_return
- );
-
- /**
- * Returns the Java charset name corresponding to the given charset ID.
- *
- * The Java charset name is a name used in JDK 1.1 and up.
- * The Java name is defined in
- * <A HREF=
- * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303>
- * http://java.sun.com/products/jdk/1.1/docs/guide/intl/intl.doc.html#25303</A>
- *
- * @param charSetID Specifies the charset ID
- * @return the corresponding Java charset name
- * @see INTL_CharSetIDToJavaName
- */
- PUBLIC const char * PR_CALLBACK INTL_CharSetIDToJavaCharSetName(
- int16 charSetID
- );
-
- /**
- * Returns a pointer to the Java charset name corresponding to
- * the given charset ID.
- *
- * This function is similar to INTL_CharSetIDToJavaCharSetName. See
- * INTL_CharSetIDToJavaCharSetName for further details.
- *
- * @param charSetID Specifies the charset ID
- * @return The corresponding Java charset name
- * @see INTL_CharSetIDToJavaCharSetName
- */
- PUBLIC unsigned char *INTL_CsidToJavaCharsetNamePt(
- int16 charSetID
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Character Set Properties */
- /*@{*/
-
- /**
- * Returns whether or not auto-detection is available for the given charset ID.
- *
- * For example, this routine will return TRUE for any of the Japanese charset
- * IDs, since a Japanese auto-detection routine is available.
- *
- * @param csid Specifies the charset ID
- * @return Whether or not auto-detection is available for the charset ID
- * @see INTL_GetCharCodeConverter
- */
- PUBLIC XP_Bool INTL_CanAutoSelect(
- int16 csid
- );
-
- /**
- * Returns the charset type.
- *
- * Returns the type of the given charset ID. The charset types are defined in
- * csid.h.
- *
- * <UL>
- * <LI>SINGLEBYTE: single-byte charset (e.g. ISO-8859-1, MacRoman)
- * <LI>MULTIBYTE: multi-byte charset (e.g. Shift-JIS, Big5)
- * <LI>STATEFUL: stateful charset (e.g. ISO-2022-JP, UTF-7)
- * <LI>WIDECHAR: wide character charset (e.g. UCS-2, UCS-4)
- * </UL>
- *
- * @param charsetid Specifies the charset ID.
- * @return The charset type.
- */
- #define INTL_CharSetType(charsetid) (charsetid & 0x700)
-
- /*@}*/
- /*=======================================================*/
- /**@name Finding Character Boundaries */
- /*@{*/
-
- /**
- * Returns the number of bytes in the given character.
- *
- * This function checks for zero bytes within the text, returning the actual
- * length even if the preceding byte(s) would normally indicate a longer
- * multibyte character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param pstr Specifies the 1st byte of the character
- * @return The number of bytes in the given character
- * @see INTL_IsLeadByte
- */
- PUBLIC int INTL_CharLen(
- int charSetID,
- unsigned char *pstr
- );
-
- /**
- * Returns number of bytes in given character, minus 1.
- *
- * This function returns the number of bytes in a character that starts with
- * the given byte, minus 1. I.e. for a single-byte character, it returns zero.
- * For a double-byte character, it returns 1. And so on. Hence, this function
- * returns a non-zero value if the given byte is the "lead byte" of a multibyte
- * character.
- * This function should not be confused with Windows API isleadbyte().
- *
- * @param charSetID Specifies the charset ID of the text
- * @param ch Specifies the first byte of a character in the text
- * @return The number of bytes in the given character, minus 1
- * @see INTL_CharLen
- */
- PUBLIC int
- #ifdef NSPR20
- PR_CALLBACK
- #endif
- INTL_IsLeadByte(
- int charSetID,
- unsigned char ch
- );
-
- /**
- * Returns a pointer to the 1st byte of the next character.
- *
- * This function checks for zero bytes and returns pstr+1 if any are found,
- * even if the preceding byte(s) would normally indicate a longer character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param pstr Specifies the 1st byte of any previous character
- * @return The 1st byte of the next character
- * @see INTL_CharLen
- */
- PUBLIC char *INTL_NextChar(
- int charSetID,
- char *pstr
- );
-
- /**
- * Returns the number of the byte pointed to by the given position.
- *
- * Determines whether the byte at the given position is the 1st, 2nd, 3rd
- * or 4th byte of the character at that position. The pstr pointer must point
- * to the first byte of any preceding character in the string. The pos
- * position must be greater than zero, and is the index into pstr plus one.
- * I.e. the byte at pstr[0] has pos 1.
- *
- * If pos points to the only byte in a single-byte character, this function
- * returns zero. Otherwise, if pos points to the 1st byte, it returns 1. If
- * pos points to the 2nd byte, it returns 2. And so on.
- *
- * @param charSetID Specifies the charset ID of the given text
- * @param pstr Specifies the beginning of a character in the string
- * @param pos Specifies the byte position within the string
- * @return The number of the byte at the given position
- * @see INTL_CharLen
- */
- PUBLIC int INTL_NthByteOfChar(
- int charSetID,
- char *pstr,
- int pos
- );
-
- /**
- * Returns the byte index of the next character.
- *
- * Given the position of a character in some text, this function returns the
- * position of the next character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param text Specifies the beginning of the text
- * @param pos Specifies the current position within the text
- * @return The position of the next character
- * @see INTL_PrevCharIdxInText
- */
- PUBLIC int INTL_NextCharIdxInText(
- int16 charSetID,
- unsigned char *text,
- int pos
- );
-
- /**
- * Returns the byte index of the previous character.
- *
- * Given the position of a character in some text, this function returns the
- * position of the previous character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param text Specifies the beginning of the text
- * @param pos Specifies the current position within the text
- * @return The position of the previous character
- * @see INTL_NextCharIdxInText
- */
- PUBLIC int INTL_PrevCharIdxInText(
- int16 charSetID,
- unsigned char *text,
- int pos
- );
-
-
- /**
- * Convert number of bytes to number of characters.
- *
- * Given a number of bytes in a given string, this function determines the
- * number of characters.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param text Specifies the text
- * @param byteCount Specifies the number of bytes
- * @return The number of characters
- * @see INTL_TextCharLenToByteCount
- */
- PUBLIC int32 INTL_TextByteCountToCharLen(
- int16 charSetID,
- unsigned char *text,
- uint32 byteCount
- );
-
- /**
- * Convert number of characters to number of bytes.
- *
- * Given a number of characters in a given string, this function determines the
- * number of bytes.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param text Specifies the text
- * @param charLen Specifies the number of characters
- * @return The number of bytes
- * @see INTL_TextByteCountToCharLen
- */
- PUBLIC int32 INTL_TextCharLenToByteCount(
- int16 charSetID,
- unsigned char *text,
- uint32 charLen
- );
-
-
- /**
- * Returns the byte index of the next character.
- *
- * Given the position of any byte of any character in some text, this function
- * returns the position of the 1st byte of the next character. The
- * difference between this function and INTL_NextCharIdxInText is that this
- * function will accept the position of any byte of a character rather than
- * just the 1st byte of a character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param str Specifies the beginning of the text
- * @param pos Specifies any byte of any character
- * @return The index of the next character
- * @see INTL_NextCharIdxInText, INTL_PrevCharIdx
- */
- PUBLIC int INTL_NextCharIdx(
- int16 charSetID,
- unsigned char *str,
- int pos
- );
-
- /**
- * Returns the byte index of the previous character.
- *
- * Given the position of any byte of any character in some text, this function
- * returns the position of the 1st byte of the previous character. The
- * difference between this function and INTL_PrevCharIdxInText is that this
- * function will accept the position of any byte of a character rather than
- * just the 1st byte of a character.
- *
- * @param charSetID Specifies the charset ID of the text
- * @param str Specifies the beginning of the text
- * @param pos Specifies any byte of any character
- * @return The index of the previous character
- * @see INTL_PrevCharIdxInText, INTL_NextCharIdx
- */
- PUBLIC int INTL_PrevCharIdx(
- int16 charSetID,
- unsigned char *str,
- int pos
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Single-Byte Charset Conversion Tables (Obsolescent) */
- /*@{*/
-
- /**
- * Free a single-byte charset conversion table.
- *
- * This is not really a public function. However, ns/sun-java/awt/macos needs
- * it, so we have to put it here.
- *
- * @see INTL_GetSingleByteTable
- * @version DEPRECATED. Obsolescent. Use INTL_DestroyCharCodeConverter instead.
- */
- MODULE_PRIVATE void INTL_FreeSingleByteTable(char **cvthdl);
-
- /**
- * Get a single-byte charset conversion table.
- *
- * This is not really a public function. However, ns/sun-java/awt/macos needs
- * it, so we have to put it here.
- *
- * @see INTL_FreeSingleByteTable
- * @see INTL_LockTable
- * @version DEPRECATED. Obsolescent. Use INTL_GetCharCodeConverter instead.
- */
- MODULE_PRIVATE char **INTL_GetSingleByteTable(
- int16 fromcsid,
- int16 tocsid,
- int32 func_ctx
- );
-
- /**
- * Lock the given single-byte charset conversion table in memory.
- *
- * This is not really a public function. However, ns/sun-java/awt/macos needs
- * it, so we have to put it here.
- *
- * @see INTL_GetSingleByteTable
- * @version DEPRECATED. Obsolescent. See INTL_GetSingleByteTable.
- */
- MODULE_PRIVATE char *INTL_LockTable(char **cvthdl);
-
- /*@}*/
- /*=======================================================*/
- /**@name HTTP Headers */
- /*@{*/
-
- /**
- * Return the AcceptLanguage preference.
- *
- * Get the HTTP Accept-Language header from preference settings.
- *
- * @return Accept-Language header (null-terminated string).
- * @see INTL_GetAcceptCharset
- */
- PUBLIC char *INTL_GetAcceptLanguage(void);
-
- /**
- * Return the AcceptCharset preference.
- *
- * Get the HTTP Accept-Charset header from preference settings.
- *
- * @return Accept-Charset header (null-terminated string).
- * @see INTL_GetAcceptLanguage
- */
- PUBLIC char *INTL_GetAcceptCharset(void);
-
- /*@}*/
- /*=======================================================*/
- /**@name Message Header Processing */
- /*@{*/
-
- /**
- * Decode and convert message header.
- *
- * This is a convenience macro that calls INTL_DecodeMimePartIIStr. It is
- * similar to INTL_DecodeMimePartIIStr, with the exception that it always
- * attempts to allocate a new buffer instead of returning the original input
- * buffer where the decoding/conversion may have been performed in place.
- *
- * @param r Returns the decoded/converted message header
- * @param b Specifies the message header
- * @param c Specifies the target window charset ID
- * @param f Specifies whether to convert the string into the wincsid or not
- * @return the decoded/converted message header (r)
- * @see INTL_DecodeMimePartIIStr
- */
- #define INTL_DECODE_MIME_PART_II(r,b,c,f) \
- (r = INTL_DecodeMimePartIIStr((b),(c),(f))), \
- ((NULL!=r) && ((r)!=(b))) ? r : (r = XP_STRDUP(b))
-
- /**
- * Decode and convert message header.
- *
- * If the message header contains an RFC 2047 encoded-word, that word is
- * decoded. Then it performs charset conversion if the dontConvert parameter is
- * false. Otherwise, it will only decode the string and return. The conversion
- * may happen later in the process. The flag is needed to work around a double
- * conversion problem.
- *
- * @param header Specifies the message string to be decoded/converted.
- * @param wincsid Specifies the target window charset ID.
- * @param dontConvert Specifies whether to convert the string into the wincsid
- * or not. If the value is true, then it will only decode
- * any RFC 2047 encoded-words, without converting their
- * charsets. If the value is false, then it will decode RFC
- * 2047 encoded-words AND convert them into the specified
- * wincsid.
- * @return Decoded and/or converted message header. If the return value is
- * different from the input buffer, the caller must free the output
- * buffer by calling XP_FREE when it is no longer needed.
- * @see INTL_DECODE_MIME_PART_II
- * @see INTL_EncodeMimePartIIStr
- * @see INTL_EncodeMimePartIIStr_VarLen
- */
- PUBLIC char *INTL_DecodeMimePartIIStr(
- const char *header,
- int16 wincsid,
- XP_Bool dontConvert
- );
-
- /**
- * Convert and encode message header.
- *
- * Convert the string into an encoding used in Internet messages and encode
- * them as per RFC 2047. It will (1) perform the codeset conversion and
- * (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message
- * encoding is ISO-2022-KR or ISO-2022-JP). This is a restrict version of
- * INTL_EncodeMimePartIIStr_VarLen which always use 72 for encodedWordSize
- *
- * @param header Specifies the RFC 1522 string to be encoded.
- * @param wincsid Specifies the source encoding
- * @param bUseMime Specifies apply RFC 1522 rule or not. If the value is
- * true or the internet message encoding is ISO-2022-JP
- * or ISO-2022-KR, then it perform RFC1522 encoding after
- * convert the text into the internet message encoding,
- * Otherwise, it only convert the text into internet
- * message encoding.
- * @return the encoded/converted header. The caller need to free this by
- * calling XP_FREE when the result is no longer needed.
- * @see INTL_DecodeMimePartIIStr
- * @see INTL_EncodeMimePartIIStr_VarLen
- */
- PUBLIC char *INTL_EncodeMimePartIIStr(
- char *header,
- int16 wincsid,
- XP_Bool bUseMime
- );
-
- /**
- * Convert and encode text into RFC 1522 header.
- *
- * Convert the string into the encoding used in internet message and encode
- * them into RFC 1522 form. It will (1) perform the codeset conversion and
- * (2) RFC 1522 encoding algorithm (if bUseMime is true or the internet message
- * encoding is ISO-2022-KR or ISO-2022-JP). It is same as
- * INTL_EncodeMimePartIIStr except it allow encodedWordSize value other than 72.
- *
- * @param header Specifies the RFC 1522 string to be encoded.
- * @param wincsid Specifies the source encoding
- * @param bUseMime Specifies apply RFC 1522 rule or not. If the value
- * is true or the internet message encoding is
- * ISO-2022-JP or ISO-2022-KR, then it perform RFC1522
- * encoding after convert the text into the internet
- * message encoding. Otherwise, it only convert the
- * text into internet message encoding.
- * @param encodedWordSize Specifies the maximum length of encoded word.
- * @return the encoded/converted header. The caller need to free this by
- * calling XP_FREE when the result is no longer needed.
- * @see INTL_DecodeMimePartIIStr
- * @see INTL_EncodeMimePartIIStr
- */
- PUBLIC char *INTL_EncodeMimePartIIStr_VarLen(
- char * header,
- int16 wincsid,
- XP_Bool bUseMime,
- int encodedWordSize
- );
-
- /**
- * [OBSOLETE!!!] We should use the INTL_DecodeMimePartIIStr instead of this.
- * We keep this Macro until we change all the callers.
- * Please do not use this in the future.
- */
- #define IntlDecodeMimePartIIStr INTL_DecodeMimePartIIStr
-
- /**
- * [OBSOLETE!!!] We should use the INTL_EncodeMimePartIIStr instead of this.
- * We keep this Macro until we change all the callers.
- * Please do not use this in the future.
- */
- #define IntlEncodeMimePartIIStr INTL_EncodeMimePartIIStr
-
-
-
- /**
- * Set a private flag to remember a state mail/news.
- *
- * A flag is used inside libi18n to remember whether we are sending mail or
- * news. This is because mail encoding and news encoding is different
- * for Korean.
- * Note that this should be used carefully since it depends on
- * the current mail/news implementation.
- * This is really a hack. It will be removed in the future.
- *
- * @param toNews Boolean value to be set to the private flag.
- */
- PUBLIC void
- INTL_MessageSendToNews(XP_Bool toNews);
-
-
- /**
- * Convert a string from RFC1522 encoded header and normalize it, by dropping
- * the case of the character.
- *
- * The return value could be used with INTL_StrContains, INTL_StrIs,
- * INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This
- * function will normalize a string by dropping the case of character according
- * to the csid the caller passed in. It will also ignore CR and LF characters.
- *
- * @param csid Specifies the encoding of str
- * @param str Specifies the to-be-normalized string.
- * @return a normalized string which could be used in INTL_StrContains,
- * INTL_StrIs , INTL_StrBeginWith and INTL_StrEndWith The caller
- * should free it by calling XP_FREE when it is not needed.
- * @see INTL_GetNormalizeStr
- * @see INTL_StrContains
- * @see INTL_StrIs
- * @see INTL_StrBeginWith
- * @see INTL_StrEndWith
- */
- PUBLIC unsigned char* INTL_GetNormalizeStrFromRFC1522(
- int16 csid,
- unsigned char* rfc1522header
- );
-
-
- /*@}*/
- /*=======================================================*/
- /**@name Unicode (UCS-2) Strings */
- /*@{*/
-
- /**
- * Unicode character typedef.
- *
- * This is used to represent a 16-bit Unicode (UCS-2) character.
- */
- typedef uint16 INTL_Unicode;
-
- /**
- * Return the length of a Unicode string.
- *
- * The given Unicode string must be terminated by U+0000.
- *
- * @param ustr Specifies the Unicode string
- * @return The length of ustr in UCS-2 units, not bytes
- */
- PUBLIC uint32 INTL_UnicodeLen(INTL_Unicode *ustr);
-
- /*@}*/
- /*=======================================================*/
- /**@name Compound Strings */
- /*@{*/
-
- /**
- * A typedef for encoding IDs (charset IDs).
- *
- * These are equivalent to charset IDs in the current code base.
- */
- typedef uint16 INTL_Encoding_ID;
-
- /*
- * See comment below.
- */
- typedef struct INTL_CompoundStr INTL_CompoundStr;
-
- /**
- * Compound String.
- *
- * A Compound String is constructed as a linked list. Each node has two fields
- * and a pointer to the next node. The two fields store a pointer to a
- * uniformly encoded piece of text and the encoding of that text.
- */
- struct INTL_CompoundStr {
- /** The encoding of the text in this node. */
- INTL_Encoding_ID encoding;
- /** The uniformly encoded text. */
- unsigned char *text;
- /** A pointer to the next node. NULL if there are no more nodes. */
- INTL_CompoundStr *next;
- };
-
- /**
- * INTL_CompoundStrIterator should really be opaque, but we need to change the
- * callers first.
- */
- typedef INTL_CompoundStr *INTL_CompoundStrIterator;
-
- /**
- * Construct an INTL_CompoundStr, given some text and its encoding.
- *
- * Use this with INTL_CompoundStrCat to create multi-encoding
- * INTL_CompoundStrs.
- *
- * @param inencoding Specifies the encoding of intext.
- * @param intext Specifies the text to be stored. Null-terminated string.
- * @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to
- * destroy it when it is no longer needed.
- * @see INTL_CompoundStrDestroy
- */
- PUBLIC INTL_CompoundStr* INTL_CompoundStrFromStr(
- INTL_Encoding_ID inencoding,
- unsigned char* intext
- );
-
- /**
- * Convert the given Unicode string to an INTL_CompoundStr.
- *
- * This routine uses information provided by the front end through
- * INTL_SetUnicodeCSIDList. It converts from Unicode to substrings in the
- * encodings that the front end said were available (in the font system).
- *
- * @param inunicode Specifies the Unicode text to be converted.
- * @param inlen Specifies the length of inunicode in UCS-2 units,
- * not bytes.
- * @return INTL_CompoundStr. The caller should use INTL_CompoundStrDestroy to
- * destroy it when it is no longer needed.
- * @see INTL_CompoundStrDestroy
- */
- PUBLIC INTL_CompoundStr* INTL_CompoundStrFromUnicode(
- INTL_Unicode* inunicode,
- uint32 inlen
- );
-
- /**
- * Destroy an INTL_CompoundStr.
- *
- * This function destroys the INTL_CompoundStr created by
- * INTL_CompoundStrFromStr or INTL_CompoundStrFromUnicode.
- *
- * @param Specifies the INTL_CompoundStr to be destroyed.
- * @see INTL_CompoundStrFromStr
- * @see INTL_CompoundStrFromUnicode
- */
- PUBLIC void INTL_CompoundStrDestroy(INTL_CompoundStr* This);
-
- /**
- * Concatenate two INTL_CompoundStrs.
- *
- * @param s1 Specifies the first INTL_CompoundStr and returns the
- * concatenated INTL_CompoundStr
- * @param s2 Specifies the second INTL_CompoundStr
- * @see INTL_CompoundStrDestroy
- */
- PUBLIC void INTL_CompoundStrCat(
- INTL_CompoundStr* s1,
- INTL_CompoundStr* s2
- );
-
- /**
- * Clone an INTL_CompoundStr.
- *
- * This function clones an INTL_CompoundStr.
- *
- * @param s Specifies the INTL_CompoundStr to be cloned
- * @return a cloned INTL_CompoundStr. The caller should use
- * INTL_CompoundStrDestroy to destroy it when it is no longer needed.
- * @see INTL_CompoundStrDestroy
- */
- PUBLIC INTL_CompoundStr* INTL_CompoundStrClone(INTL_CompoundStr* s1);
-
- /**
- * Start iterating an INTL_CompoundStr.
- *
- * Initialize the iterating state and perform the first iteration of an
- * INTL_CompoundStr.
- *
- * @param This Specifies the INTL_CompoundStr to be iterated
- * @param outencoding Returns the encoding of the first node
- * @param outtext Returns the text of the first node. The caller should
- * not free it.
- * @return INTL_CompoundStrIterator. The state of the iteration. Should be
- * passed to INTL_CompoundStrNextStr. NULL if the iteration is
- * finished.
- * @see INTL_CompoundStrNextStr
- */
- PUBLIC INTL_CompoundStrIterator INTL_CompoundStrFirstStr(
- INTL_CompoundStr* This,
- INTL_Encoding_ID *outencoding,
- unsigned char** outtext
- );
-
- /**
- * Iterating INTL_CompoundStr.
- *
- * This function iterates through the INTL_CompoundStr for the given
- * INTL_CompoundStrIterator.
- *
- * @param iterator Specifies the INTL_CompoundStrIterator
- * @param outencoding Returns the encoding of the current node
- * @param outtext Returns the text of the current node. The caller should
- * not free it.
- * @return INTL_CompoundStrIterator. The state of the iteration. Should be
- * passed to INTL_CompoundStrNextStr. NULL if the iteration is
- * finished.
- * @see INTL_CompoundStrFirstStr
- */
- PUBLIC INTL_CompoundStrIterator INTL_CompoundStrNextStr(
- INTL_CompoundStrIterator iterator,
- INTL_Encoding_ID *outencoding,
- unsigned char** outtext
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Unicode Conversion */
- /*@{*/
- /**
- * An opaque data object used to iterate through Unicode text for
- * conversion to font encodings.
- *
- * See also the functions that use this object.
- *
- * @see INTL_UnicodeToStrIteratorCreate
- * @see INTL_UnicodeToStrIterate
- * @see INTL_UnicodeToStrIteratorDestroy
- *
- */
- typedef void* INTL_UnicodeToStrIterator ;
-
- /**
- * Create an INTL_UnicodeToStrIterator and iterate through it once.
- *
- * This function creates an INTL_UnicodeToStrIterator and iterates through it
- * once to get the first element of Unicode text for font encoding conversion.
- * The function uses the prioritized Character Set ID list (CSIDList) to
- * decide which font encoding it will convert to. The iteration stops if the
- * whole Unicode string is converted. Otherwise, it continues iterating and
- * uses the next charset in the CSIDlist to convert the Unicode text.
- *
- * @param ustr Specifies Unicode string to be converted
- * @param ustrlen Specifies length of ustr in UCS-2 units not bytes
- * @param encoding Returns the encoding of the first element.
- * Returns 0 if there are no more to iterate.
- * @param dest Specifies the buffer for output and returns the
- * converted string for the first iteration
- * @param destbuflen Specifies the length of dest in bytes
- * @return Iterator which keeps the iteration state
- * @see INTL_GetUnicodeCSIDList
- * @see INTL_SetUnicodeCSIDList
- * @see INTL_UnicodeToStrIterate
- * @see INTL_UnicodeToStrIteratorDestroy
- * @see INTL_GetUnicodeCharsetList
- */
- PUBLIC INTL_UnicodeToStrIterator INTL_UnicodeToStrIteratorCreate(
- INTL_Unicode* ustr,
- uint32 ustrlen,
- INTL_Encoding_ID *encoding,
- unsigned char* dest,
- uint32 destbuflen
- );
-
- /**
- * Iterate through a Unicode object and convert to font encoding.
- *
- * Iterate the INTL_UnicodeToStrIterator to get Unicode to font encoding
- * conversion.
- *
- * @param iterator Specifies iterator that keeps the last iteration state
- * @param encoding Returns the encoding of the first element. Returns 0
- * if there are no more to iterate.
- * @param dest Specifies the buffer for output and returns the
- * converted string for the current iteration
- * @param destbuflen Specifies the length of dest in bytes
- * @return 0 if there are no more elements to iterate.
- * @see INTL_GetUnicodeCSIDList
- * @see INTL_SetUnicodeCSIDList
- * @see INTL_UnicodeToStrIteratorCreate
- * @see INTL_UnicodeToStrIteratorDestroy
- * @see INTL_GetUnicodeCharsetList
- */
- PUBLIC int INTL_UnicodeToStrIterate(
- INTL_UnicodeToStrIterator iterator,
- INTL_Encoding_ID *encoding,
- unsigned char* dest,
- uint32 destbuflen
- );
-
- /**
- * Destroy an INTL_UnicodeToStrIterator.
- *
- * This function destroys the INTL_UnicodeToStrIterator created by
- * INTL_UnicodeToStrIterateCreate.
- *
- * @param iterator Specifies the iterator to be destroyed
- * @see INTL_GetUnicodeCSIDList
- * @see INTL_SetUnicodeCSIDList
- * @see INTL_UnicodeToStrIteratorCreate
- * @see INTL_UnicodeToStrIterate
- * @see INTL_GetUnicodeCharsetList
- */
- PUBLIC void INTL_UnicodeToStrIteratorDestroy(
- INTL_UnicodeToStrIterator iterator
- );
-
- /**
- * Return memory requirement for INTL_UnicodeToStr.
- *
- * Returns the maximum memory required for text converted from a Unicode
- * string to a specified encoding. Call this to prepare memory for
- * INTL_UnicodeToStr.
- *
- * @param encoding Specifies the target encoding
- * @param ustr Specifies the buffer containing UCS-2 data
- * @param ustrlen Specifies the valid length of ustr in UCS-2 units
- * not bytes
- * @return Number of bytes needed to store the converted result
- * @see INTL_UnicodeToStr
- */
- PUBLIC uint32 INTL_UnicodeToStrLen(
- INTL_Encoding_ID encoding,
- INTL_Unicode* ustr,
- uint32 ustrlen
- );
-
- /**
- * Convert Unicode string to a specified encoding.
- *
- * The caller needs to call INTL_UnicodeToStrLen first to prepare memory and
- * pass into dest.
- *
- * @param encoding Specifies the target encoding
- * @param ustr Specifies the buffer containing UCS-2 data
- * @param ustrlen Specifies the valid length of ustr in UCS-2 units
- * not bytes
- * @param dest Specifies the buffer for the converted text and
- * returns the converted text
- * @param destbuflen Specifies the size of dest in bytes
- * @see INTL_UnicodeToStrLen
- */
- PUBLIC void INTL_UnicodeToStr(
- INTL_Encoding_ID encoding,
- INTL_Unicode* ustr,
- uint32 ustrlen,
- unsigned char* dest,
- uint32 destbuflen
- );
-
- /**
- * Convert Unicode to text in one encoding by trial and error.
- *
- * This routine tries to convert the given Unicode string into text of one
- * non-Unicode encoding. This is a trial and error function which may be
- * slow in "THE WORST CASE". However, it does it's best in the best case and
- * average case.
- *
- * @param ustr Specifies the buffer containing UCS-2 data
- * @param ustrlen Specifies the valid length of ustr in UCS-2 units
- * not bytes
- * @param dest Specifies the buffer for the converted text and
- * returns the converted text
- * @return Encoding of the converted text
- */
- PUBLIC INTL_Encoding_ID INTL_UnicodeToEncodingStr(
- INTL_Unicode* ustr,
- uint32 ustrlen,
- unsigned char* dest,
- uint32 destbuflen
- );
-
- /**
- * Return memory requirement for INTL_StrToUnicode.
- *
- * Return the maximum memory requirement for text converted from the
- * specified encoding to Unicode. Call this to prepare memory for
- * INTL_StrToUnicode. The difference between INTL_TextToUnicodeLen is
- * the input string is specified by a NULL terminated string.
- *
- * @param encoding Specifies the encoding of text in src
- * @param src Specifies the text to be converted
- * @return Size of Unicode to store the converted output (in
- * UCS-2 units not bytes)
- * @see INTL_StrToUnicode
- * @see INTL_TextToUnicodeLen
- */
- PUBLIC uint32 INTL_StrToUnicodeLen(
- INTL_Encoding_ID encoding,
- unsigned char* src
- );
-
- /**
- * Convert non-Unicode text to Unicode.
- *
- * The caller needs to call INTL_StrToUnicodeLen first to prepare memory and
- * pass into ustr. The difference between INTL_TextToUnicode is the input
- * string is specified by a NULL terminated string.
- *
- * @param encoding Specifies the encoding of text in src
- * @param src Specifies the text to be converted
- * @param ustr Specifies the buffer for Unicode and returns the converted
- * Unicode
- * @param ubuflen Specifies the size of the ustr in UCS-2 units not bytes
- * @return Size of the converted Unicode (in UCS-2 units not bytes)
- * @see INTL_StrToUnicodeLen
- * @see INTL_TextToUnicode
- */
- PUBLIC uint32 INTL_StrToUnicode(
- INTL_Encoding_ID encoding,
- unsigned char* src,
- INTL_Unicode* ustr,
- uint32 ubuflen
- );
-
- /**
- * Return memory requirement for INTL_TextToUnicode.
- *
- * Return the maximum memory requirement for text converted from a specified
- * encoding to Unicode . Call this to prepare memory for INTL_TextToUnicode.
- * The difference between INTL_StrToUnicodeLen is the input is not specified
- * by a NULL terminated string, but a pointer and length.
- *
- * @param encoding Specifies the encoding of text in src
- * @param src Specifies the text to be converted
- * @param srclen Specifies the number of bytes in src
- * @return Size of Unicode to store the converted output (in UCS-2
- * units not bytes)
- * @see INTL_TextToUnicode
- * @see INTL_StrToUnicodeLen
- */
- PUBLIC uint32 INTL_TextToUnicodeLen(
- INTL_Encoding_ID encoding,
- unsigned char* src,
- uint32 srclen
- );
-
- /**
- * Convert text from non-Unicode to Unicode.
- *
- * The caller needs to call INTL_TextToUnicodeLen first to prepare memory and
- * pass into ustr. The difference between INTL_StrToUnicode is the input is
- * not specified by a NULL terminated string, but a pointer and length.
- *
- * @param encoding Specifies the encoding of text in src
- * @param src Specifies the text to be converted
- * @param srclen Specifies the number of bytes in src
- * @param ustr Specifies the buffer for the Unicode string and returns
- * the converted Unicode string
- * @param ubuflen Specifies the size of the ustr in the UCS-2 units not
- * bytes
- * @return Size of converted Unicode (in UCS-2 units not bytes)
- * @see INTL_TextToUnicodeLen
- * @see INTL_StrToUnicode
- */
- PUBLIC uint32 INTL_TextToUnicode(
- INTL_Encoding_ID encoding,
- unsigned char* src,
- uint32 srclen,
- INTL_Unicode* ustr,
- uint32 ubuflen
- );
-
-
- /**
- * Initial Unicode conversion routines from a list of Character Set ID (CSID)
- * for Unicode rendering.
- *
- * It should only be called once in the application life time. It should be
- * called by front end before calling any other Unicode conversion functions.
- * The list could be retrieved through INTL_GetUnicodeCSIDList or
- * INTL_GetUnicodeCharsetList.
- *
- * @param numberOfItem Specifies the valid number in the csidlist
- * @param csidlist Specifies a prioritized list of csid to be used for
- * Unicode to font charset conversion. The function will
- * make a copy of the list the caller pass in. The caller
- * could free the pass in list after this function.
- * @ see INTL_GetUnicodeCSIDList
- * @ see INTL_UnicodeToStrIteratorCreate
- * @ see INTL_UnicodeToStrIterate
- * @ see INTL_UnicodeToStrIteratorDestroy
- * @ see INTL_GetUnicodeCharsetList
- */
- PUBLIC void INTL_SetUnicodeCSIDList(
- uint16 numOfItems,
- int16 *csidlist);
-
- /**
- * Returns a list of Character Set ID (CSID) used for converting Unicode
- * to font encoding.
- *
- * The list is set in the initialization time by the front end through
- * INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList
- * and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a
- * list of CSIDs and the INTL_GetUnicodeCharsetList returns a list of charset
- * names (strings).
- *
- * @param outnum Returns the number of items in the returned CSID array.
- * @return Array of CSIDs. Caller should change or free the returned array.
- * @see INTL_SetUnicodeCSIDList
- * @see INTL_UnicodeToStrIteratorCreate
- * @see INTL_UnicodeToStrIterate
- * @see INTL_UnicodeToStrIteratorDestroy
- * @see INTL_GetUnicodeCharsetList
- */
- PUBLIC int16* INTL_GetUnicodeCSIDList(int16 * outnum);
-
- /**
- * Return a list of charset names (strings) used for converting Unicode to font
- * encoding.
- *
- * The list is set in the initialization time by front end through
- * INTL_SetUnicodeCSIDList. The only difference between INTL_GetUnicodeCSIDList
- * and INTL_GetUnicodeCharsetList is that INTL_GetUnicodeCSIDList returns a
- * list of CSIDs and INTL_GetUnicodeCharsetList returns a list of charset
- * names (strings).
- *
- * @param outnum Returns the number of items in the returned charset array
- * @return Array of charset names. Caller should not change or free the
- * returned array.
- * @see INTL_GetUnicodeCSIDList
- * @see INTL_SetUnicodeCSIDList
- * @see INTL_UnicodeToStrIteratorCreate
- * @see INTL_UnicodeToStrIterate
- * @see INTL_UnicodeToStrIteratorDestroy
- */
- PUBLIC unsigned char **INTL_GetUnicodeCharsetList(int16 * outnum);
-
- /**
- * Converts a UTF-8 sub-string to the appropriate font encoding.
- *
- * Converts characters until the encoding changes or
- * input/output space runs out.
- *
- * The segment is NOT NULL TERMINATED
- *
- * @param utf8p Specifies the UTF-8 string
- * @param utf8len Specifies the length of utf8p
- * @param LE_string Specifies and returns the (pre-allocated) buffer
- * for the string converted to the font encoding
- * @param LE_string_len Specifies the length of the buffer for LE_string
- * @param LE_written_len Returns the valid length of the return LE_string
- * @param LE_string_csid Returns the CSID of the return LE_string:
- * <UL>
- * <LI>
- * >0 if successful (valid CSID).
- * <LI>
- * -1 if not Unicode.
- * <LI>
- * -2 if no font encoding.
- * </UL>
- * @return Length of converted UTF-8 string
- */
- PUBLIC int utf8_to_local_encoding(
- const unsigned char *utf8p,
- const int utf8len,
- unsigned char *LE_string,
- int LE_string_len,
- int *LE_written_len,
- int16 *LE_string_csid
- );
-
- /**
- * Convert text from UTF-8 to UCS-2 encoding.
- *
- * UCS-2 is the abbreviation for the two byte form of Unicode.
- * UTF-8 is a transformation encoding for Unicode.
- * For more information about UTF-8 look at RFC 2279 in
- * <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt>
- * ftp://ds.internic.net/rfc/rfc2279.txt</A> .
- * For more information about UCS-2, look at <A HREF=http://www.unicode.org>
- * http://www.unicode.org</A>.
- *
- * @param utf8p Specifies the UTF-8 text buffer. It is NULL terminated.
- * @param num_chars Returns the length of the converted UCS-2 in UCS-2 units
- * not bytes
- * @return UCS-2 string, NULL terminated by U+0000, or NULL. The
- * caller should free it by calling XP_FREE when it is no
- * longer needed.
- * @see INTL_UCS2ToUTF8
- */
- PUBLIC UNICVTAPI uint16 *INTL_UTF8ToUCS2(
- const unsigned char *utf8p,
- int32 *num_chars
- );
-
- /**
- * Convert text from UCS-2 to UTF-8 encoding.
- *
- * UCS-2 is the abbreviation for the two byte form of Unicode.
- * UTF-8 is a transformation encoding for Unicode.
- * For more information about UTF-8 look at RFC 2279 in
- * <A HREF=ftp://ds.internic.net/rfc/rfc2279.txt>
- * ftp://ds.internic.net/rfc/rfc2279.txt</A> .
- * For more information about UCS-2, look at <A HREF= http://www.unicode.org>
- * http://www.unicode.org</A>.
- *
- * @param ucs2p Specifies the UCS-2 text buffer
- * @param num_chars Specifies the length of ucs2p, in UCS-2 units not bytes
- * @return NULL terminated UTF-8 string or NULL. The caller should
- * free it by calling XP_FREE when it is no longer needed.
- * @see INTL_UTF8ToUCS2
- */
- PUBLIC UNICVTAPI unsigned char *INTL_UCS2ToUTF8(
- const uint16 *ucs2p,
- int32 num_chars
- );
- /*@}*/
- /*=======================================================*/
- /**@name String Comparison */
- /*@{*/
-
- /**
- * Case insensitive comparison.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly. It also ignore case by considering the charset
- * it used.
- *
- * @param charSetID Specifies the encoding of text1 and text2.
- * @param text1 Specifies address of text1.
- * @param text2 Specifies address of text2.
- * @param charlen Returns the length in byte of text1.
- * @return true if the text1 and text2 point to the same character,
- * ignoring the case, false otherwise.
- * @see INTL_MatchOneCaseChar
- * @see INTL_Strstr
- * @see INTL_Strcasestr
- */
- PUBLIC XP_Bool INTL_MatchOneChar(
- int16 charSetID,
- unsigned char *text1,
- unsigned char *text2,
- int *charlen
- );
-
- /**
- * Case sensitive comparison.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly.
- *
- * @param charSetID Specifies the encoding of text1 and text2.
- * @param text1 Specifies address of text1.
- * @param text2 Specifies address of text2.
- * @param charlen Returns length in bytes of text1.
- * @return true if the text1 and text2 point to the same character (same case),
- * false otherwise.
- * @see INTL_MatchOneChar
- * @see INTL_Strstr
- * @see INTL_Strcasestr
- */
- PUBLIC XP_Bool INTL_MatchOneCaseChar(
- int16 charSetID,
- unsigned char *text1,
- unsigned char *text2,
- int *charlen
- );
-
- /**
- * Case sensitive sub-string search.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly.
- *
- * @param charSetID Specifies the encoding of s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return NULL if s1 does not contains s2,
- * otherwise, return the address of the sub-string in s1.
- * @see INTL_MatchOneChar
- * @see INTL_MatchOneCaseChar
- * @see INTL_Strcasestr
- */
- PUBLIC char *INTL_Strstr(
- int16 charSetID,
- const char *s1,
- const char *s2
- );
-
- /**
- * Case insensitive sub-string search.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly. It also ignore case by considering the charset it
- * used.
- *
- * @param charSetID Specifies the encoding of s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return NULL if s1 does not contains s2,
- * otherwise, return the address of the sub-string in s1.
- * @see INTL_MatchOneChar
- * @see INTL_MatchOneCaseChar
- * @see INTL_Strstr
- */
- PUBLIC char *INTL_Strcasestr(
- int16 charSetID,
- const char *s1,
- const char *s2
- );
-
-
- /*
- Function to support correct mail/news comparison:
- INTL_GetNormalizeStr
- INTL_GetNormalizeStrFromRFC1522
- INTL_StrContains
- INTL_StrIs
- INTL_StrBeginWith
- INTL_StrEndWith
-
- Example:
-
- XP_Bool MailHeaderContains(csid, header, str)
- {
- XP_Bool result = FALSE;
- unsigned char* n_str = INTL_GetNormalizeStr(csid, str);
- unsigned char* n_header = INTL_GetNormalizeStrFromRFC1522(csid, header);
-
- if((NULL != n_str) && (NULL != n_header))
- result = INTL_StrContains(csid, n_header, n_str);
- if(n_str)
- XP_FREE(n_str);
- if(n_header)
- XP_FREE(n_header);
- return result;
- }
-
- */
-
- /**
- * Normalize a string, by dropping the case of the characters.
- *
- * The return value could be used with INTL_StrContains, INTL_StrIs,
- * INTL_StrBeginWith or INTL_StrEndWith to perform string matching. This
- * function normalizes a string by dropping the case of character according to
- * the charSetID the caller passed in. It also ignores CR and LF characters.
- *
- * @param charSetID Specifies the encoding of str
- * @param str Specifies the to-be-normalized string.
- * @return a normalized string which could be used in INTL_StrContains,
- * INTL_StrIs, INTL_StrBeginWith and INTL_StrEndWith The caller should
- * free it by calling XP_FREE when it is not needed.
- * @see INTL_GetNormalizeStrFromRFC1522
- * @see INTL_StrContains
- * @see INTL_StrIs
- * @see INTL_StrBeginWith
- * @see INTL_StrEndWith
- */
- PUBLIC unsigned char* INTL_GetNormalizeStr(
- int16 charSetID,
- unsigned char* str
- );
-
- /**
- * Test if string s1 contains string s2.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly. To do string matching with ignoring the case of
- * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
- * before call this function.
- *
- * @param charSetID Specifies the encoding for s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return true if s1 contains s2,
- * false otherwise
- * @see INTL_GetNormalizeStr
- * @see INTL_GetNormalizeStrFromRFC1522
- * @see INTL_StrIs
- * @see INTL_StrBeginWith
- * @see INTL_StrEndWith
- */
- PUBLIC XP_Bool INTL_StrContains(
- int16 charSetID,
- unsigned char* str1,
- unsigned char* str2
- );
-
- /**
- * Test if string s1 is string s2.
- *
- * This function is multibyte charset safe. It will consider characters boundary
- * correctly. To do string matching with ignoring the case of character, call
- * INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522) before calling this
- * function.
- *
- * @param charSetID Specifies the encoding for s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return true if two string are equal, false otherwise
- * @see INTL_GetNormalizeStr
- * @see INTL_GetNormalizeStrFromRFC1522
- * @see INTL_StrContains
- * @see INTL_StrBeginWith
- * @see INTL_StrEndWith
- */
- PUBLIC XP_Bool INTL_StrIs(
- int16 charSetID,
- unsigned char* str1,
- unsigned char* str2
- );
-
- /**
- * Test if string s1 begin with string s2.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly. To do string matching with ignoring the case of
- * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
- * before calling this function.
- *
- * @param charSetID Specifies the encoding for s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return true if the first string is begin with the second string,
- * false otherwise
- * @see INTL_GetNormalizeStr
- * @see INTL_GetNormalizeStrFromRFC1522
- * @see INTL_StrContains
- * @see INTL_StrIs
- * @see INTL_StrEndWith
- */
- PUBLIC XP_Bool INTL_StrBeginWith(
- int16 charSetID,
- unsigned char* str1,
- unsigned char* str2
- );
-
- /**
- * Test if string s1 end with string s2.
- *
- * This function is multibyte charset safe. It will consider characters
- * boundary correctly. To do string matching with ignoring the case of
- * character, call INTL_GetNormalizeStr (or INTL_GetNormalizeStrFromRFC1522)
- * before calling this function.
- *
- * @param charSetID Specifies the encoding for s1 and s2.
- * @param s1 Specifies the first string
- * @param s2 Specifies the second string
- * @return true if the first string is end with the second string, false
- * otherwise.
- * @see INTL_GetNormalizeStr
- * @see INTL_GetNormalizeStrFromRFC1522
- * @see INTL_StrContains
- * @see INTL_StrIs
- * @see INTL_StrBeginWith
- */
- PUBLIC XP_Bool INTL_StrEndWith(
- int16 charSetID,
- unsigned char* str1,
- unsigned char* str2
- );
-
- /**
- * Return a (hacky) XPAT pattern for NNTP server for searching pre
- * RFC 1522 message header.
- *
- * This is a hacky function which try to work around another HACK!!! The
- * problem it tries to solve is to search on NNTP, internet newsgroup server.
- * Unfortunately, the NNTP server does not have non-ASCII text searching
- * command. The only functionality in the NNTP protocol we could use is the
- * XPAT extension of NNTP (see
- * <A HREF=ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt>
- * ftp://ds.internic.net/internet-drafts/draft-ietf-nntpext-imp-01.txt</A> or
- * <A HREF=ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt>
- * ftp://ds.internic.net/internet-drafts/draft-barber-nntp-imp-07.txt</A> ).
- * XPAT use wildmat regular expression (see <A HREF=
- * http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html>
- * http://oac.hsc.uth.tmc.edu/oac_sysadmin/services/INN/man/wildmat.3.html</A>
- * for details) to provide string matching. Unfortunately, wildmat is not
- * designed to support non-ASCII text. It work for English header but not for
- * header in other language like Japanese, French, or German. The problem is
- * the XPAT/wildmat cannot deal with (1) ISO-2022-xx encoding nor (2) RFC 1522
- * header. To work around the limitation in the protocol, we put together this
- * function to support the first limitation as possible as we can. This
- * function take one search string, and return a XPAT pattern which could then
- * be used to send to NNTP XPAT as search argument. However, there are some
- * limitation here. (1) It may cause NNTP return more message than it should,
- * the reason is the XPAT won't respect to the multibyte character boundary
- * when it try to match the string. To improve this in the future, the client
- * double check the header after it receive message from the server and narrow
- * it down to the correct case. (2) The pattern it generated won't match RFC
- * 1522 header so it could return less message than it should. This is because
- * there are more than one XPAT could match the sting in the case of RFC 1522
- * header. To improve this in the future, the client side should send several
- * possible XPAT patterns (with the patterned return by this function), collect
- * the result, and then double checking in the client side. Of course, improve
- * the NNTP protocol itself is the real solution. But the improvement stated
- * above is also needed for the server support the current NNTP protocol. This
- * function (1) convert the text from the encoding the argument specified into
- * the encoding used in the corresponding internet newsgroup, (2) strip out
- * leading or trailing ISO-2022 escape sequence if present, (3) escape the
- * wildmat special characters (any characters which is not from 0-9, a-z, A-Z),
- * and return.
- *
- * @param winCharSetID Specifies the encoding of searchString.
- * @param searchString Specifies the string to be search through NNTP XPAT
- * command.
- * @return the pattern should be send to NNTP XPAT command for searching
- * non-ASCII header. The caller need to free this by calling XP_FREE
- * when the result is no longer needed.
- */
- PUBLIC unsigned char* INTL_FormatNNTPXPATInNonRFC1522Format(
- int16 winCharSetID,
- unsigned char* searchString
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Charset ID Iterator */
- /*@{*/
-
-
- /**
- * An object that can iterate through a list of charset ID.
- *
- * @see INTL_CSIDIteratorCreate
- * @see INTL_CSIDIteratorDestroy
- * @see INTL_CSIDIteratorNext
- */
- typedef void* INTL_CSIDIterator;
-
- /**
- * Returns a new iterator object to search charset IDs for a particular
- * conversion.
- *
- * This function searches a built-in table to look for charset converters
- * that could be used for a particular purpose. The only purpose currently
- * supported is the IMAP4 conversion. This function puts the mail and news
- * charset IDs corresponding to the given charset ID at the top of the list
- * of IDs to try. After that, it inserts the "to" charset IDs of all entries
- * matching the given "from" ID.
- *
- * @param iterator_return Returns a new iterator object
- * @param charSetID Specifies the charset ID to convert from
- * @param flag Specifies the type of conversion
- * Currently, the only valid value is
- * csiditerate_TryIMAP4Search .
- *
- * @see INTL_CSIDIteratorNext, INTL_CSIDIteratorDestroy
- */
- PUBLIC void INTL_CSIDIteratorCreate(
- INTL_CSIDIterator *iterator,
- int16 charSetID,
- int flag
- );
-
- /**
- * Frees the given iterator, and sets given pointer to NULL.
- *
- * This function destroys the object created by INTL_CSIDIteratorCreate.
- *
- * @param iterator Specifies the iterator object to destroy
- * @see INTL_CSIDIteratorCreate
- */
- PUBLIC void INTL_CSIDIteratorDestroy(
- INTL_CSIDIterator *iterator
- );
-
- /**
- * Returns the next charset ID in the given iterator, if any.
- *
- * The return value is TRUE if a charset ID was found. The charset ID
- * is returned in pCharSetID. Otherwise, the return value is FALSE, and
- * pCharSetID remains untouched.
- *
- * @param iterator Specifies the iterator object
- * @param pCharSetID Returns the next charset ID
- * @return TRUE if there are more elements to be iterate, otherwise FALSE
- * @see INTL_CSIDIteratorCreate, INTL_CSIDIteratorDestroy
- */
- PUBLIC XP_Bool INTL_CSIDIteratorNext(
- INTL_CSIDIterator *iterator,
- int16 *pCharSetID
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Line/Word Breaking */
- /*@{*/
-
- /**
- * Line breaking information.
- *
- * <UL>
- * <LI>
- * PROHIBIT_NOWHERE -
- * It is a breakable character. It could be break before
- * or after this character. This class is for all
- * Kanji ideographic character.
- * <LI>
- * PROHIBIT_BEGIN_OF_LINE -
- * It should not appeared in the beginning of the line.
- * <LI>
- * PROHIBIT_END_OF_LINE -
- * It should not appeared in the end of the line.
- * <LI>
- * PROHIBIT_WORD_BREAK -
- * It is non breakable character. It cannot be break
- * if the next (or previous) character is also
- * PROHIBIT_WORD_BREAK.
- * </UL>
- *
- * @see INTL_KinsokuClass
- */
- enum LINE_WRAP_PROHIBIT_CLASS{
- PROHIBIT_NOWHERE,
- PROHIBIT_BEGIN_OF_LINE,
- PROHIBIT_END_OF_LINE,
- PROHIBIT_WORD_BREAK
- };
-
- /**
- * Basic Japanese word breaking information.
- *
- * <UL>
- * <LI>
- * SEVEN_BIT_CHAR - e.g. ASCII
- * <LI>
- * HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana
- * <LI>
- * FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS
- * <LI>
- * FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana
- * <LI>
- * KANJI_CHAR - ideographic
- * <LI>
- * UNCLASSIFIED_CHAR - others
- * </UL>
- *
- * @see INTL_CharClass
- */
- enum WORD_BREAK_CLASS{
- SEVEN_BIT_CHAR,
- HALFWIDTH_PRONOUNCE_CHAR,
- FULLWIDTH_ASCII_CHAR,
- FULLWIDTH_PRONOUNCE_CHAR,
- KANJI_CHAR,
- UNCLASSIFIED_CHAR
- };
- /**
- * Returns the code point that represent the non-breaking space character.
- *
- * The current implementation return the same value regardless of the given
- * charset. However, the return value is platform dependent.
- * The information then is used by parser and layout code.
- *
- * Using this function with caution as it is tied to
- * the current HTML parser implementation.
- *
- * @param winCharSetID Specifies the window charset id.
- * @return the code point which Non Breaking Space in a
- * C style NULL terminated string.
- * @see
- */
- PUBLIC const char *INTL_NonBreakingSpace(
- uint16 winCharSetID
- );
-
- /**
- * Returns information for basic Japanese word breaking.
- *
- * Given a character pointer and charset, returns a word breaking
- * character class for the given character.
- * It is necessary to pass a pointer because the
- * character may be more than one byte.
- *
- * In the future, the definition of word breaking classes needs to be
- * extended.
- *
- * @param winCharSetID Specifies the window charset ID
- * @param pstr Specifies the pointer to the character
- * @return Character class for word breaking:
- * <UL>
- * <LI>
- * SEVEN_BIT_CHAR - e.g. ASCII
- * <LI>
- * HALFWIDTH_PRONOUNCE_CHAR - e.g. Japanese Katakana
- * <LI>
- * FULLWIDTH_ASCII_CHAR - e.g. ASCII in JIS
- * <LI>
- * FULLWIDTH_PRONOUNCE_CHAR - e.g. Japanese Hiragana, Katakana
- * <LI>
- * KANJI_CHAR - ideographic
- * <LI>
- * UNCLASSIFIED_CHAR - others
- * </UL>
- * @see INTL_KinsokuClass
- * @see WORD_BREAK_CLASS
- */
- PUBLIC int INTL_CharClass(
- int winCharSetID,
- unsigned char *pstr
- );
-
- /**
- * Returns line breaking information.
- *
- * Given a character pointer and charset, returns a line breaking
- * character class for the given character.
- * It is necessary to pass a pointer because the
- * character may be more than one byte.
- *
- *
- * Please notice that the function currently only supports multibyte charsets.
- * If this is called for ascii charset, it always return PROHIBIT_WORD_BREAK.
- *
- * References for line breaking:
- * <UL>
- * <LI>
- * Japanese Standard Association,
- * JIS X 4501 1995 - Japanese Industrial Standard -
- * Line Composition rules for Japanese documents
- * <LI>
- * Ken Lunde,
- * Understanding Japanese Information Processing,
- * O'Reilly & Associates, Inc.,
- * ISBN:1-56592-043-0,
- * pp.148
- * <LI>
- * Nadine Kano,
- * Developing International Software For Windows 95 and Windows NT,
- * Microsoft Press,
- * ISBN:1-556-15-840-8,
- * pp.239-244
- * </UL>
- *
- *
- * @param winCharSetID Specifies window charset ID.
- * @param pstr Specifies the pointer to the character
- * @return the kinsoku class for line breaking:
- * <UL>
- * <LI>
- * PROHIBIT_NOWHERE -
- * It is a breakable character. It could be break before
- * or after this character. This class is for all
- * Kanji ideographic character.
- * <LI>
- * PROHIBIT_BEGIN_OF_LINE -
- * It should not appeared in the beginning of the line.
- * <LI>
- * PROHIBIT_END_OF_LINE -
- * It should not appeared in the end of the line.
- * <LI>
- * PROHIBIT_WORD_BREAK -
- * It is non breakable character. It cannot be break
- * if the next (or previous) character is also
- * PROHIBIT_WORD_BREAK.
- * </UL>
- * @see INTL_CharClass
- * @see LINE_WRAP_PROHIBIT_CLASS
- */
- PUBLIC int INTL_KinsokuClass(
- int16 winCharSetID,
- unsigned char *pstr
- );
-
- /**
- * Returns the column width of the given character.
- *
- * In some countries, old terminals use full-width and half-width characters.
- * This function returns the number of "columns" taken up by the given
- * character. For example, in Japan, normal characters take up 2 columns,
- * while half-width characters take up 1 column each.
- *
- * Returns 1 for charsets that do not distinguish between half-width and
- * full-width characters.
- *
- * @param winCharSetID Specifies the charset ID of the text
- * @param pstr Specifies the character
- * @return The column width of the given character
- * @see INTL_IsHalfWidth
- */
- PUBLIC int INTL_ColumnWidth(
- int winCharSetID,
- unsigned char *pstr
- );
-
- /**
- * Truncates a long string by replacing excess characters in the middle
- * with "...".
- *
- * The output_return pointer may be the same as the input pointer.
- *
- * @param winCharSetID Specifies the charset ID of the text
- * @param input Specifies the text to be mid-truncated
- * @param output_return Returns the mid-truncated text
- * @param max_length Specifies the desired number of bytes to be placed in
- * the output buffer, minus 1 for null terminator
- */
- PUBLIC void INTL_MidTruncateString(
- int16 winCharSetID,
- const char *input,
- char *output_return,
- int max_length
- );
-
- /**
- * Returns whether or not the given character is a half-width character.
- *
- * In some countries, certain characters are normal width on old terminals,
- * while other characters are half-width. For example, normal Japanese
- * characters are considered normal width, while "hankaku kana" are
- * half-width, as are the ASCII characters.
- *
- * @param winCharSetID Specifies the charset ID of the text
- * @param pstr Specifies the character
- * @return
- * 0 if the given character is ASCII or the charset do not normally
- * distinguish between half-width and full-width,
- * 1 if the given character is half-width
- * @see INTL_ColumnWidth
- */
- PUBLIC int INTL_IsHalfWidth(
- uint16 winCharSetID,
- unsigned char *pstr
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Document Context Handling */
- /*@{*/
- /**
- * Request a re-layout of the document.
- *
- * Libi18n calls this function in those cases where a different document
- * encoding is detected after document conversion and layout has begun.
- * This can occur because the parsing and layout of the document begins
- * immediately when the document data begins to stream in - at which time
- * all the data needed to determine the charset may not be available. If
- * this occurs, the layout engine needs to be notified to pull the data from
- * the source (cache) again so the data will be converted by the correct
- * character codeset conversion module in the data stream.
- *
- * @param context Specifies the context which should be relayout again.
- */
- PUBLIC void
- INTL_Relayout(iDocumentContext context);
-
- /**
- * Returns name of the document charset.
- *
- * The returned string is suitable for use in the window brought up by
- * View | Page Info (previously known as Document Info). It also provides
- * information such as whether this charset was auto-detected.
- *
- * @param doc_context Specifies the document context
- * @return Name (string) of the document charset
- */
- PUBLIC char *INTL_CharSetDocInfo(
- iDocumentContext doc_context
- );
-
- /**
- * Get the UI charset encoding setting.
- *
- * Gets the currently selected charset encoding for this document
- * (not the global default and not the detected document encoding).
- *
- * @param context Specifies document context
- * @return Document charset ID selected by the user
- * @see
- */
- PUBLIC uint16 FE_DefaultDocCharSetID(
- iDocumentContext context
- );
-
- /**
- * Change the default document charset ID.
- *
- * This function is currently only implemented and called by the Windows
- * platform. It will be removed in the future to keep the consistency between
- * platforms.
- *
- * @param defaultDocCharSetID Specifies the new default document charset ID
- * @version DEPRECATED. Do not use this function.
- */
- #if defined(XP_WIN) || defined(XP_OS2)
- PUBLIC void
- INTL_ChangeDefaultCharSetID(int16 defaultDocCharSetID);
- #endif
-
- /**
- * Return default charset from preference or from current encoding
- * menu selection.
- *
- * @param context Specifies the context
- * @return Default document charset ID. If the context is NULL
- * then it returns default charset from the user preference.
- * If the context is specified then it returns current
- * encoding menu selection.
- */
- PUBLIC int16
- INTL_DefaultDocCharSetID(iDocumentContext context);
-
- /**
- * Returns the default window charset ID for the given document context.
- *
- * If context is NULL, or the context's window charset ID is zero, this
- * function calls INTL_DefaultWinCharSetID, passing the same context.
- *
- * @param context Specifies the document context
- * @return The default window charset ID for this document context
- * @see INTL_DefaultWinCharSetID
- */
- PUBLIC int16 INTL_DefaultTextAttributeCharSetID(
- iDocumentContext context
- );
-
- /**
- * Returns the default window charset ID for the given document context.
- *
- * If context is NULL, or if the context's window charset ID is zero, this
- * function calls INTL_DefaultDocCharSetID, passing the same context, and then
- * calls INTL_DocToWinCharSetID on the result.
- *
- * @param context Specifies the document context
- * @return Default window charset ID for this document context
- * @see INTL_DefaultDocCharSetID, INTL_DocToWinCharSetID
- */
- PUBLIC int16 INTL_DefaultWinCharSetID(
- iDocumentContext context
- );
- /**
- * Set up the charset conversion stream module.
- *
- * This function gets the charset info object from the context, and then
- * picks up the relayout flag and the document charset ID before calling
- * INTL_CSIInitialize. It then creates the appropriate charset converter
- * to convert from the document to window charset. The stream is set up
- * by setting the various function pointers (put, abort, complete, etc).
- * It then hooks up to the next stream module "INTERNAL_PARSER", the HTML
- * parser and layout engine. This is done by rewriting URL_s' content_type
- * field.
- *
- * @param format_out Specifies the type of stream
- * @param data_obj Ignored
- * @param URL_s Specifies the URL object
- * @param window_id Specifies the context
- * @return Stream object corresponding to this charset conversion module
- * @see INTL_CSIInitialize, NET_StreamBuilder
- */
- PUBLIC Stream *INTL_ConvCharCode(
- int format_out,
- void *data_obj,
- URL *URL_s,
- iDocumentContext window_id
- );
-
- /**
- * Converts mail charset to display charset used by current window.
- *
- * It decides which display charset to use based on current default language.
- * Caller is responsible for deallocating memory.
- *
- * @param context the context (window ID).
- * @param bit7buff Source buffer.
- * @param block_size the length of the source buffer.
- * @return Destination buffer. If NULL, this means either conversion failed or
- * did single-byte to single-byte conversion.
- */
- PUBLIC unsigned char *INTL_ConvMailToWinCharCode(
- iDocumentContext context,
- unsigned char *bit7buff,
- uint32 block_size
- );
-
- /*@}*/
- /*=======================================================*/
- /**@name Platform Independent String Resources */
- /*@{*/
- /**
- * Return the Charset name of the translated resource.
- *
- * @return MIME charset of the cross-platform string resource and FE
- * resources
- * @see XP_GetString
- * @see XP_GetStringForHTML
- */
- PUBLIC char *
- INTL_ResourceCharSet(void);
-
- /*@}*/
- /*=======================================================*/
-
- XP_END_PROTOS
-
- #endif /* INTL_LIBI18N_H */
-