home *** CD-ROM | disk | FTP | other *** search
- // utf8.cpp
- //
- // Created 12/09/97
- //
- // (C)Copyright 1997-1999 Microsoft Corporation, All rights reserved.
- //
-
- #include "pch.hpp"
- #pragma hdrstop
-
- #include "utf8.hpp"
- #include "ansibuf.hpp"
-
-
- // Returns number of characters in a Utf8 string that's assumed to be
- // terminated with a single 0 byte (Our constant pool always stores strings
- // this way.)
- DWORD Utf8NumChars(LPCSTR pszUtf8)
- {
- LPCSTR p;
- DWORD nChars = 0;
- unsigned char c;
- p = pszUtf8;
- while ( '\0' != (c = *((unsigned char*)(p)))) {
- nChars++;
- if (!(c & 0x80)) {
- // Normal single-byte character.
- p++;
- } else if (0xc0 == (c & 0xe0)) {
- // Double-byte char
- p += 2;
- } else {
- ASSERT(0xe0 == (c & 0xf0));
- p += 3;
- }
- }
- return nChars;
- }
-
- //----------------------------------------------------------------------
- // Utf8ToUnicode
- //
- // Converts the first cbUtf characters of a utf8 string to its unicode
- // equivalent. pwsz must be large enough to hold the unicode string.
- // This function does NOT put a null-terminater at the end of the buffer.
- //
-
- VOID Utf8ToUnicode(
- LPCSTR pUtf,
- int cbUtf,
- LPWSTR pwsz )
- {
- unsigned char c;
-
- while( cbUtf-- > 0 )
- {
- c = *(unsigned char *)pUtf;
-
- if( !(c & 0x80) )
- {
- // Normal single-byte character.
- *pwsz = (WCHAR)c;
- pUtf++;
- }
- else if( 0xc0 == (c & 0xe0) )
- {
- // Double-byte char
- unsigned char c2 = *(unsigned char *)(pUtf + 1);
- ASSERT( 0x80 == (c2 & 0xc0) );
-
- *pwsz = (WCHAR)(( (WCHAR)((WCHAR)(c & 0x1f)) << 6 ) |
- ( ((WCHAR)(c2 & 0x3f)) ));
-
- pUtf += 2;
- }
- else
- {
- unsigned char c2 = *(unsigned char*)(pUtf + 1);
- unsigned char c3 = *(unsigned char*)(pUtf + 2);
- ASSERT( 0xe0 == (c & 0xf0) );
- ASSERT( 0x80 == (c2 & 0xc0) );
- ASSERT( 0x80 == (c2 & 0xc0) );
-
- *pwsz = (WCHAR)(( (WCHAR)((WCHAR)(c & 0x0f)) << 12 ) |
- ( (WCHAR)((WCHAR)(c2 & 0x3f)) << 6 ) |
- ( ((WCHAR)(c3 & 0x3f)) ));
-
-
- pUtf += 3;
- }
-
- pwsz++;
- }
- }
-
- //----------------------------------------------------------------------
- // Utf8ToUnicode
- //
- // Converts a utf8 string to its unicode equivalent, returning a
- // dynamically allocated buffer.
- //
-
- HRESULT Utf8ToUnicode(
- LPCSTR pUtf,
- LPWSTR *ppwsz)
- {
- HRESULT hr;
- UINT cbUtf = Utf8NumChars(pUtf);
-
- *ppwsz = new(WCHAR[cbUtf+1]);
-
- if (*ppwsz)
- {
- Utf8ToUnicode( pUtf, cbUtf, *ppwsz );
- (*ppwsz)[cbUtf] = L'\0';
- hr = S_OK;
- }
- else
- hr = E_OUTOFMEMORY;
-
- return hr;
- }
-
- //----------------------------------------------------------------------
- // Utf8ToUnicode
- //
- // Converts a utf8 string to its unicode equivalent, returning a
- // dynamically allocated buffer.
- //
-
- HRESULT CoUtf8ToUnicode(
- LPCSTR pUtf,
- LPWSTR *ppwsz)
- {
- HRESULT hr;
- UINT cbUtf = Utf8NumChars(pUtf);
-
- *ppwsz = (LPWSTR)CoTaskMemAlloc(sizeof(WCHAR)*(cbUtf+1));
-
- if (*ppwsz)
- {
- Utf8ToUnicode( pUtf, cbUtf, *ppwsz );
- (*ppwsz)[cbUtf] = L'\0';
- hr = S_OK;
- }
- else
- hr = E_OUTOFMEMORY;
-
- return hr;
- }
-
-
- HRESULT Utf8ToAnsi(
- PCUTF8 pUtf,
- LPSTR *ppAnsi)
- {
- HRESULT hr = E_OUTOFMEMORY;
- int ncLen;
-
- *ppAnsi = NULL;
-
- DWORD dwStrLen = Utf8NumChars( pUtf );
- WCHAR *wszSimpleName = new( WCHAR[ dwStrLen + 1 ] ); // add one for NULL terminator
-
- if (!wszSimpleName) {
- goto exit;
- }
-
- Utf8ToUnicode( pUtf, dwStrLen, wszSimpleName );
-
- ncLen = WideCharToMultiByte(CP_ACP, 0, wszSimpleName, dwStrLen, NULL, 0, NULL, NULL);
- if (ncLen < 0) {
- hr = E_UNEXPECTED;
- goto exit;
- }
-
- *ppAnsi = new(CHAR[ncLen + 1]);
- if (!*ppAnsi)
- goto exit;
-
- if( WideCharToMultiByte( CP_ACP, 0, wszSimpleName, dwStrLen,
- *ppAnsi, ncLen, NULL, NULL ) >= 0 )
- {
- (*ppAnsi)[ ncLen ] = '\0'; // NULL-terminate
- } else {
- delete(*ppAnsi);
- *ppAnsi = NULL;
- hr = E_UNEXPECTED;
- goto exit;
- }
-
- hr = S_OK;
-
- exit:
- if (wszSimpleName)
- delete wszSimpleName;
-
- return hr;
- }
-
-
- //------------------------------------------------------------------------
-
-
- BOOL ANSIStringBuffer::EnsureSpace (int needed)
- {
- int newlen = m_len + needed;
- if (newlen < m_size)
- return TRUE;
-
- int newsize = (m_size+1)*2;
- if (newsize < newlen)
- newsize = newlen+1;
- PSTR newstr = new(CHAR[newsize]);
- if (newstr != NULL)
- {
- if (m_str != NULL)
- {
- CopyMemory(newstr, m_str, m_len+1);
- delete(m_str);
- }
- m_str = newstr;
- m_size = newsize;
- return TRUE;
- }
- return FALSE;
- }
-
-
- BOOL ANSIStringBuffer::AppendUtf8 (PCSTR pUtf)
- {
- BOOL ret = FALSE;
-
- DWORD dwStrLen = Utf8NumChars( pUtf );
- WCHAR *wszSimpleName = new( WCHAR[ dwStrLen + 1 ] ); // add one for NULL terminator
-
- if (wszSimpleName != NULL)
- {
- Utf8ToUnicode( pUtf, dwStrLen, wszSimpleName );
-
- wszSimpleName[dwStrLen] = L'\0';
-
- ret = Append(wszSimpleName);
-
- delete wszSimpleName;
- }
-
- return ret;
- }
-
-