home *** CD-ROM | disk | FTP | other *** search
- /************************************************************************\
- *
- * MODULE : FILE_HTMLREMOVE.C
- *
- * PROGRAMS : HTMLENRM
- *
- * PURPOSE : Conversion of source file to target file replacing HTML
- * entities by target code page characters and supporting
- * variables and functions
- *
- \************************************************************************/
-
- #include <windows.h>
- #include <stdio.h>
- #include <string.h>
-
- #include "FILE_HTMLREMOVE.H"
- #include "CONV_SUB.H"
-
- /************************************************************************\
- * Conversion mode setting
- * normal default
- * loose replace HTML entities corresponding to characters with
- * accents that are not defined in the target code page by
- * characters without accents
- * all the same as "loose" and convert also special HTML entities
- * & > < and "
- \************************************************************************/
-
- enum {normal, loose, all};
-
- static int ConvMode = normal;
-
- void SetNormalConversion (void)
- {
- ConvMode=normal;
- }
-
- void SetLooseConversion (void)
- {
- ConvMode=loose;
- }
-
- void SetAllConversion (void)
- {
- ConvMode=all;
- }
-
- /************************************************************************\
- * Time mode setting
- * current set date/time of the target file to the current file
- * original set date/time of the target file to be the same as
- * of the source (original) file
- \************************************************************************/
-
- enum {original, current};
-
- static int TimeMode = original;
- FILETIME CurrentTime;
-
- void SetOriginalFileTime (void)
- {
- TimeMode=original;
- }
-
- void SetCurrentFileTime (void)
- {
- TimeMode=current;
- CoFileTimeNow(&CurrentTime);
- }
-
- /************************************************************************\
- * Encoding setting
- * EncDir encoding directory (directory of the code page tables)
- * CodePage target code page
- \************************************************************************/
-
- static char EncDir[_MAX_PATH];
- static char CodePage[_MAX_FNAME]={"CP1250"};
-
- void SetEncodingDirectory (char *directory)
- {
- int n;
- strncpy(EncDir,directory,_MAX_PATH);
- EncDir[_MAX_PATH-1]='\0';
- n=strlen(EncDir);
- while (n>0 && EncDir[n]=='\\')
- {
- EncDir[n]='\0';
- n--;
- }
- }
-
- void SetTargetCodePage (char *cp)
- {
- strncpy (CodePage,cp,_MAX_FNAME);
- CodePage[_MAX_FNAME-1]='\0';
- }
-
- /************************************************************************\
- * Directory for temporary files
- \************************************************************************/
-
- static char TempDir[_MAX_PATH]={""};
-
- /************************************************************************\
- * Conversion tables
- * Conv0,nConv0 structure for creation of conversion table
- * Conv,nConv conversion table
- \************************************************************************/
-
- static struct {
- char entity[20];
- char description[30];
- char loosestring[20];
- char string[20];
- BOOL use;
- } Conv0[384];
-
- static struct {
- int ncen;
- char entity[20];
- int ncst;
- char string[20];
- } Conv[384];
-
- static int nConv0=0;
- static int nConv=0;
-
- /************************************************************************\
- *
- * FUNCTION : InitConversionTable
- *
- * LOCAL VARS : fname - code page table filename
- * f - code page table file handle
- * line - buffer for code page table line
- * entity - HTML entity
- * description - character description
- * loosestring - string for character replace if there isn't
- * target character
- * ichar - character code
- *
- \************************************************************************/
-
- BOOL InitConversionTable (void)
- {
- int i,n,ichar;
- char entity[20],description[30],loosestring[20],line[100];
- char fname[_MAX_FNAME];
- FILE *f;
-
- if (GetTempPath((DWORD)sizeof(TempDir),TempDir)==0)
- return (FALSE);
-
- // HTML entities table reading
-
- strcat(strcpy(fname,EncDir),"\\HTML.ENC");
- f=fopen(fname,"rt");
- if (f==NULL)
- {
- printf("\n Error: Unable to find %s",fname);
- return (FALSE);
- }
- while(!feof(f))
- {
- if (fgets(line,100,f)==NULL)
- break;
- if (line[0]==';')
- continue;
- line[99]='\0';
- memset(entity,0,20);
- memset(description,0,30);
- sscanf(line,"%s %s",entity,description);
- n=strlen(entity);
- if (entity[0]!='&' || entity[n-1]!=';')
- continue;
- if (ConvMode!=all)
- if (strcmp(entity,"&")==0 || strcmp(entity,"<")==0 ||
- strcmp(entity,"<")==0)
- continue;
- Conv0[nConv0].use=FALSE;
- strcpy(Conv0[nConv0].entity,entity);
- strcpy(Conv0[nConv0].description,description);
- memset(Conv0[nConv0].string,0,20);
- memset(Conv0[nConv0].loosestring,0,20);
- nConv0++;
- }
- fclose(f);
-
- // HTML coded character set creating based on ISO-8859-1 table
-
- strcat(strcpy(fname,EncDir),"\\ISO-8859-1.ENC");
- f=fopen(fname,"rt");
- if (f==NULL)
- {
- printf("\n Warning: Unable to find %s"
- "\n Numerical entities will not be replaced.",fname);
- goto accent;
- }
- while(!feof(f))
- {
- if (fgets(line,100,f)==NULL)
- break;
- if (line[0]==';')
- continue;
- line[99]='\0';
- memset(entity,0,20);
- memset(description,0,30);
- sscanf(line,"%d %s",&ichar,description);
- if (strlen(description)<1)
- continue;
- sprintf(entity,"%d;",ichar);
- Conv0[nConv0].use=FALSE;
- strcpy(Conv0[nConv0].entity,entity);
- strcpy(Conv0[nConv0].description,description);
- memset(Conv0[nConv0].string,0,20);
- memset(Conv0[nConv0].loosestring,0,20);
- nConv0++;
- }
- fclose(f);
-
- // not-accented strings assignment to accented characters
-
- accent:
- if (ConvMode!=normal)
- {
- strcat(strcpy(fname,EncDir),"\\ACCENT.CONV");
- f=fopen(fname,"rt");
- if (f==NULL)
- {
- printf("\n Warning: Unable to find %s"
- "\n Remaining accent character entities will not be replaced",
- fname);
- goto readcp;
- }
- while(!feof(f))
- {
- if (fgets(line,100,f)==NULL)
- break;
- if (line[0]==';')
- continue;
- line[99]='\0';
- memset(description,0,30);
- memset(loosestring,0,20);
- sscanf(line,"%s %s",description,loosestring);
- if (strlen(description)<1)
- continue;
- for (i=0; i<nConv0; i++)
- {
- if (strcmp(description,Conv0[i].description)==0)
- strcpy(Conv0[i].loosestring,loosestring);
- break;
- }
- }
- fclose(f);
- }
-
- // target code page table reading and assignment to the HTML entities
-
- readcp:
- strcat(strcat(strcat(strcpy(fname,EncDir),"\\"),CodePage),".ENC");
- f=fopen(fname,"rt");
- if (f==NULL)
- {
- printf("\n Error: Unable to find %s",fname);
- return (FALSE);
- }
- while(!feof(f))
- {
- if (fgets(line,100,f)==NULL)
- break;
- if (line[0]==';')
- continue;
- line[99]='\0';
- memset(description,0,30);
- sscanf(line,"%d %s",&ichar,description);
- if (strlen(description)<1)
- continue;
- for(i=0; i<nConv0; i++)
- {
- if (Conv0[i].use==TRUE)
- continue;
- if (strcmp(description,Conv0[i].description)==0)
- {
- Conv0[i].string[0]=(char)ichar;
- Conv0[i].use=TRUE;
- }
- }
- }
- fclose(f);
-
- // decision of which loose conversion to use
-
- if (ConvMode!=normal)
- {
- for (i=0; i<nConv0; i++)
- {
- if (Conv0[i].use==TRUE)
- continue;
- if (strlen(Conv0[i].loosestring)>0)
- {
- strcpy(Conv0[i].string,Conv0[i].loosestring);
- Conv0[i].use=TRUE;
- }
- }
- }
-
- // finishing file conversion table
-
- nConv=0;
- for (i=0; i<nConv0; i++)
- if (Conv0[i].use==TRUE)
- {
- strcpy (Conv[nConv].entity,Conv0[i].entity);
- strcpy (Conv[nConv].string,Conv0[i].string);
- Conv[nConv].ncen=strlen(Conv[nConv].entity);
- Conv[nConv].ncst=strlen(Conv[nConv].string);
- nConv++;
- }
-
- return (TRUE);
- }
-
- /************************************************************************\
- *
- * FUNCTION : ConvertFile
- *
- * INPUTS : SourceDir - source file name
- * TargetDir - target file name
- *
- * RETURNS : None
- *
- * LOCAL VARS : hSFile - source file handle
- * hTFile - target file handle
- * hTempFile - temporary file handle
- * TempName - temporary file name
- * ssize - source file size
- * stime - source file time
- * buff - buffer for source file content
- *
- \************************************************************************/
-
- void ConvertFile(char *SFileName, char *TFileName)
- {
- HANDLE hSFile, hTFile, hTempFile;
- char *buff,*p0,*p1,*p2;
- char TempName[_MAX_FNAME];
- DWORD dwBytesRead,dwBytesWritten,dwPos;
- DWORD ssize,n;
- FILETIME stime;
- int i;
-
- if (GetTempFileName(TempDir,"HTR",0,TempName)==0)
- return;
- hTempFile= CreateFile(TempName,GENERIC_WRITE,0,
- (LPSECURITY_ATTRIBUTES)NULL,CREATE_ALWAYS,
- FILE_ATTRIBUTE_NORMAL,(HANDLE)NULL);
- if (hTempFile==INVALID_HANDLE_VALUE)
- return;
-
- hSFile= CreateFile(SFileName,GENERIC_READ,0,
- (LPSECURITY_ATTRIBUTES) NULL,OPEN_EXISTING,
- FILE_ATTRIBUTE_NORMAL,(HANDLE)NULL);
- if (hSFile==INVALID_HANDLE_VALUE)
- return;
- ssize=GetFileSize(hSFile,NULL);
- GetFileTime(hSFile,NULL,NULL,&stime);
-
- buff=malloc(ssize);
- if (buff==NULL)
- return;
- ReadFile(hSFile, (LPSTR)buff, ssize, &dwBytesRead, NULL);
- p0=&buff[0];
- p1=strchr(p0,'&');
- p2=&buff[dwBytesRead-1];
- dwPos=SetFilePointer(hTempFile,0,(LPLONG)NULL,FILE_BEGIN);
- while (p1!=NULL && p1<p2)
- {
- n=(DWORD)(p1-p0);
- WriteFile(hTempFile,(LPSTR)p0,n,&dwBytesWritten,NULL);
- p0=p1;
- for (i=0;i<nConv;i++)
- if (strncmp(Conv[i].entity,p0,Conv[i].ncen)==0)
- {
- WriteFile(hTempFile,Conv[i].string,Conv[i].ncst,&dwBytesWritten,NULL);
- p0+=Conv[i].ncen;
- goto nextamp;
- }
- WriteFile(hTempFile,p0,1,&dwBytesWritten,NULL);
- p0++;
- nextamp:
- p1=strchr(p0,'&');
- }
- n=(DWORD)(p2-p0)+1;
- WriteFile(hTempFile,(LPSTR)p0,n,&dwBytesWritten,NULL);
- free(buff);
- CloseHandle(hSFile);
- CloseHandle(hTempFile);
-
- CopyFile(TempName,TFileName,FALSE);
- DeleteFile(TempName);
-
- hTFile= CreateFile(TFileName,GENERIC_WRITE,0,
- (LPSECURITY_ATTRIBUTES) NULL,OPEN_EXISTING,
- FILE_ATTRIBUTE_NORMAL,(HANDLE)NULL);
- if (hTFile==INVALID_HANDLE_VALUE)
- return;
- if (TimeMode==current)
- SetFileTime(hTFile,NULL,NULL,&CurrentTime);
- else
- SetFileTime(hTFile,NULL,NULL,&stime);
- CloseHandle(hTFile);
- printf("\n Converted target file: %s", TFileName);
- return;
- }
-
-