Tools

home *** CD-ROM | disk | FTP | other *** search

/ Tools / WinSN5.0Ver.iso / NETSCAP.50 / WIN1998.ZIP / ns / lib / libi18n / intlcomp.c < prev next >

Wrap

C/C++ Source or Header | 1998-04-08 | 11.0 KB | 379 lines

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ /* intlcomp.c */ /* This file implement INTL_MatchOneChar INTL_MatchOneCaseChar INTL_Strstr INTL_Strcasestr */ #include "intlpriv.h" #include "pintlcmp.h" #define CHECK_CSID_AND_ASSERT(csid) \ { \ XP_ASSERT(CS_UNKNOWN != (csid)); /* Please don't pass in CS_UNKNOWN here, you need to know the csid */ \ XP_ASSERT(CS_DEFAULT != (csid)); /* Please don't pass in CS_DEFAULT here, you need to know the csid */ \ XP_ASSERT(CS_ASCII != (csid)); /* Please don't pass in CS_ASCII here, you need to know the csid */ \ } /* Private Function Prototype */ extern unsigned char lower_lookup_ascii[]; #define INTL_SingleByteToLower(lower, ch) ((ch & 0x80) ? (lower[(ch & 0x7f)]) : (lower_lookup_ascii[ch])) MODULE_PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *, unsigned char* , unsigned char* ); PRIVATE void intl_strip_CRLF(unsigned char* str) { unsigned char* in; unsigned char* out; for(in = out = str; 0 != *in; in++) { if((CR != *in) && (LF != *in)) *out++ = *in; } *out = 0; } /* Function intl_caseless_normalize This function have side effect to modify the string it got. It will normalize the string in a caseless matter */ PRIVATE void intl_caseless_normalize(int16 csid, unsigned char* str) { unsigned char *sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); unsigned char *p; CHECK_CSID_AND_ASSERT(csid); XP_ASSERT(NULL != str); intl_strip_CRLF(str); if(SINGLEBYTE == INTL_CharSetType(csid)) { /* for singlebyte csid */ for(p = str; *p != 0 ; p++) *p = INTL_SingleByteToLower(sb_tolowermap, *p); return; } else { /* for multibyte csid */ DoubleByteToLowerMap *db_tolowermap = INTL_GetDoubleByteToLowerMap(csid); unsigned char *p; int l; for(p = str; *p != 0; p += l) { l = INTL_CharLen(csid ,p); /* *** FIX ME: IMPROVE PERFORMANCE */ switch(l) { case 1: *p = INTL_SingleByteToLower(sb_tolowermap, *p); break; case 2: if(0 == *(p+1)) { /* Check weather we hit partial characters. This happen when we use wrong csid */ /* However, we should not pass array bondary even we use wrong csid */ XP_ASSERT(FALSE); return; /* get partial characters, return */ } INTL_DoubleByteToLower(db_tolowermap, p, p); break; default: { unsigned char *ck; /* Check weather we hit partial characters. This happen when we use wrong csid */ /* However, we should not pass array bondary even we use wrong csid */ for(ck = p+l-1; ck != p ;ck--) { if(0 == *ck) { XP_ASSERT(FALSE); return; /* get partial characters, return */ } } /* We current do not handle 3 byte normalization. We need to work on this for UTF8 */ } break; } } } } PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *db_tolowermap, unsigned char* lowertext, unsigned char* text) { DoubleByteToLowerMap *p; for(p = db_tolowermap; !((p->src_b1 == 0) && (p->src_b2_start == 0)); p++) { if( (p->src_b1 == text[0]) && (p->src_b2_start <= text[1] ) && (p->src_b2_end >= text[1]) ) { lowertext[0] = p->dest_b1; lowertext[1] = text[1] - p->src_b2_start + p->dest_b2_start; return; } else { /* The map have to be sorted order to implement a fast search */ if(p->src_b1 > text[0]) break; else { if((p->src_b1 == text[0]) && (p->src_b2_start > text[1])) break; } } } lowertext[0] = text[0]; lowertext[1] = text[1]; return; } PUBLIC XP_Bool INTL_MatchOneChar(int16 csid, unsigned char *text1,unsigned char *text2,int *charlen) { if((INTL_CharSetType(csid) == SINGLEBYTE) ) { unsigned char *sb_tolowermap; *charlen = 1; sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0])); } else { int l1, l2; l1 = INTL_CharLen(csid ,text1); /* *** FIX ME: IMPROVE PERFORMANCE */ l2 = INTL_CharLen(csid ,text2); /* *** FIX ME: IMPROVE PERFORMANCE */ if(l1 != l2) return FALSE; if(l1 == 1) { unsigned char *sb_tolowermap; *charlen = 1; sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0])); } else { if(l1 == 2) { DoubleByteToLowerMap *db_tolowermap; unsigned char lowertext1[2], lowertext2[2]; *charlen = 2; db_tolowermap = INTL_GetDoubleByteToLowerMap(csid); INTL_DoubleByteToLower(db_tolowermap, lowertext1, text1); INTL_DoubleByteToLower(db_tolowermap, lowertext2, text2); return( ( lowertext1[0] == lowertext2[0] ) && ( lowertext1[1] == lowertext2[1] ) ); } else { /* for character which is neither one byte nor two byte, we cannot ignore case for them */ int i; *charlen = l1; for(i=0;i<l1;i++) { if(text1[i] != text2[i]) return FALSE; } return TRUE; } } } } PUBLIC XP_Bool INTL_MatchOneCaseChar(int16 csid, unsigned char *text1,unsigned char *text2,int *charlen) { if((INTL_CharSetType(csid) == SINGLEBYTE) ) { *charlen = 1; return( text1[0]== text2[0]); } else { int i,len; *charlen = len = INTL_CharLen(csid, (unsigned char *) text1); /* *** FIX ME: IMPROVE PERFORMANCE */ for(i=0 ; i < len; i++) { if(text1[i] != text2[i]) return FALSE; } return TRUE; } } PUBLIC char *INTL_Strstr(int16 csid, const char *s1,const char *s2) { int len; char *p1, *pp1, *p2; if((s2==NULL) || (*s2 == '\0')) return (char *)s1; if((s1==NULL) || (*s1 == '\0')) return NULL; for(p1=(char*)s1; *p1 ;p1 = INTL_NextChar(csid ,p1)) /* *** FIX ME: IMPROVE PERFORMANCE */ { for(p2=(char*)s2, pp1=p1 ; ((*pp1) && (*p2) && INTL_MatchOneCaseChar(csid, (unsigned char*)pp1, (unsigned char*)p2, &len)); pp1 += len, p2 += len) /* *** FIX ME: IMPROVE PERFORMANCE */ ; /* do nothing in the loop */ if(*p2 == '\0') return p1; } return NULL; } /* To Do: We should take advantage of INTL_GetNormalizeStr to improve the performance of this */ PUBLIC char *INTL_Strcasestr(int16 csid, const char *s1, const char *s2) { int len; char *p1, *pp1, *p2; if((s2==NULL) || (*s2 == '\0')) return (char *)s1; if((s1==NULL) || (*s1 == '\0')) return NULL; for(p1=(char*)s1; *p1 ;p1 = INTL_NextChar(csid , p1)) /* *** FIX ME: IMPROVE PERFORMANCE */ { for(p2=(char*)s2, pp1=p1 ; ((*pp1) && (*p2) && INTL_MatchOneChar(csid, (unsigned char*)pp1, (unsigned char*)p2, &len)); pp1 += len, p2 += len) /* *** FIX ME: IMPROVE PERFORMANCE */ ; /* do nothing in the loop */ if(*p2 == '\0') return p1; } return NULL; } PUBLIC unsigned char* INTL_GetNormalizeStr(int16 csid, unsigned char* str) { char* n_str = NULL; StrAllocCopy(n_str, (char*) str); XP_ASSERT(n_str); /* Should only come here if Memory Not Enough */ CHECK_CSID_AND_ASSERT(csid); if(NULL != n_str) intl_caseless_normalize(csid, (unsigned char*)n_str); return (unsigned char*)n_str; } #ifdef MOZ_MAIL_NEWS PUBLIC unsigned char* INTL_GetNormalizeStrFromRFC1522(int16 csid, unsigned char* rfc1522header) { char* n_header = (char*) INTL_DecodeMimePartIIStr((char*)rfc1522header, csid, FALSE); if(NULL == n_header) /* INTL_DecodeMimePartIIStr() may return NULL- Mean no conversion */ StrAllocCopy(n_header, (char*) rfc1522header); XP_ASSERT(n_header); /* Should only come here if Memory Not Enough */ CHECK_CSID_AND_ASSERT(csid); if(NULL != n_header) intl_caseless_normalize(csid, (unsigned char*)n_header); return (unsigned char*)n_header; } #endif /* MOZ_MAIL_NEWS */ PUBLIC XP_Bool INTL_StrContains( int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr) { /* It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2 are normalized by calling INTL_GetNormalizeStr() or INTL_GetNormalizeStrFromRFC1522() */ char* p; int l_char; int l_idx; int l_substr = XP_STRLEN((char*) normalizedSubstr); int l_str = XP_STRLEN((char*) normalizedStr); CHECK_CSID_AND_ASSERT(strcsid); for(p = (char*)normalizedStr, l_idx = 0, l_char = 0; (0 != *p) && (l_idx < l_str) ; p += l_char, l_idx += l_char) { l_char = INTL_CharLen(strcsid, (unsigned char*)p); /* *** FIX ME: Should do better tune for performance here */ if(0 == XP_STRNCMP(p, (char*) normalizedSubstr, l_substr)) return TRUE; } return FALSE; } PUBLIC XP_Bool INTL_StrIs( int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr) { /* It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2 are normalized by calling INTL_GetNormalizeStr() or INTL_GetNormalizeStrFromRFC1522() */ CHECK_CSID_AND_ASSERT(strcsid); return (0 == XP_STRCMP((char*) normalizedStr, (char*) normalizedSubstr)); } PUBLIC XP_Bool INTL_StrBeginWith( int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr) { /* It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2 are normalized by calling INTL_GetNormalizeStr() or INTL_GetNormalizeStrFromRFC1522() */ CHECK_CSID_AND_ASSERT(strcsid); return (0 == XP_STRNCMP((char*) normalizedStr, (char*) normalizedSubstr, XP_STRLEN((char*) normalizedSubstr))); } PUBLIC XP_Bool INTL_StrEndWith( int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr) { /* It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2 are normalized by calling INTL_GetNormalizeStr() or INTL_GetNormalizeStrFromRFC1522() */ char* p; int l_char; int l_idx; int l_substr = XP_STRLEN((char*) normalizedSubstr); int l_str = XP_STRLEN((char*) normalizedStr); int l_stop = l_str - l_substr; CHECK_CSID_AND_ASSERT(strcsid); for(p = (char*)normalizedStr, l_idx = 0, l_char = 0; (0 != *p) && (l_idx < l_stop) ; p += l_char, l_idx += l_char) l_char = INTL_CharLen(strcsid, (unsigned char*)p); /* *** FIX ME: Should do better tune for performance here */ if(l_idx != l_stop) return FALSE; return (0 == XP_STRCMP(p, (char*) normalizedSubstr)); }