|
Reasons |
|
Well, if you believe in the sheer beauty of
C++ class libraries don't read any further. You'll find the following material shocking.
If you own a PII SMP system with 512 MB memory and expect that all users of your software
will use the same type of computer you may be tempted to ignore the following material,
too. If, however, you do need small applications that do keep resource usage low and thus
run on small and older systems as well as heavily loaded systems at reasonable speed
you're in the right location. |
|
Standard Code |
|
Let's have a look at the following source
code that does print out the command parameters and the environment of the application. |
|
#include <stdio.h>
int main(int argc,char *argv[],char **envp)
{
while(argc--)printf("%s\n",*argv++);
printf("\n");
while(*envp)printf("%s\n",*envp++);
return 0;
}
|
|
Doing a standard compile this application
turns out to consume from 23,552 bytes (Windows, MSVC 4.2) to 32,256 bytes (OS/2, VACPP
3). Not much? True but think of the few lines of source code and then ask yourself how
many kbytes per line of code are going to be executed. Then have a look at the resources
in terms of memory and CPU for such a small application and you'll find out that the
overhead lies in the C runtime library. |
|
Removing the Runtime Library |
|
To remove the C runtime library I'll have to
split the readers into two groups, those developing for Windows (95/NT) and those developing for OS/2. And before you continue to read remember one
vitally important thing that is common to both platforms: no function must allocate more
than 4096 bytes of stack (return address, call variables, local variables). If you do need
more memory you'll hav to dynamically (de)allocate it or use global variables (not
recommended). To give you an idea as how typical C runtime library functions can be
replaced I'll show you some replacement functions at the end of this article. |
|
Windows
95 and Windows NT |
|
So, you windows developers, let's write the application given above in a bit different way: |
|
/*********************************************/
/* headers */
/*********************************************/
#include <windows.h>
/*********************************************/
/* type definitions */
/*********************************************/
/* thread local storage structure */
typedef struct
{
HANDLE heap; /* handle to local heap */
char *token; /* memory for strtok() */
}
TLSANCHOR;
/*********************************************/
/* global variables */
/*********************************************/
DWORD tlsindex=0xFFFFFFFF;/*local storage idx*/
/*********************************************/
/* regular functions (sample) */
/*********************************************/
void out(char *str)
{
unsigned long i=0;
while(str[i])i++;
WriteFile(GetStdHandle(STD_OUTPUT_HANDLE),
str,i,&i,NULL);
WriteFile(GetStdHandle(STD_OUTPUT_HANDLE),
"\r\n",2,&i,NULL);
}
int main(int argc,char *argv[],char **envp)
{
while(argc--)out(*argv++);
out("");
while(*envp)out(*envp++);
return 0;
}
/*********************************************/
/* startup function */
/*********************************************/
void __cdecl startup(void)
{
TLSANCHOR tdata; /*main thread local storage */
int argc; /*main() argument count */
char **argv; /*main() argument vector */
char **envp; /*main() environment vector */
char *cmd; /*cmd line/environment block*/
char *ptr; /*char rover for arguments */
int i; /*general purpose usage */
int r=1; /*process exit value */
/* allocate thread local storage
index, react to errors */
if((tlsindex=TlsAlloc())==0xFFFFFFFF)
ExitProcess(1);
/* preset thread local storage for
main thread, react to errors */
if(!TlsSetValue(tlsindex,&tdata))
goto err1;
if((tdata.heap=
HeapCreate(HEAP_NO_SERIALIZE,0,0))==NULL)
goto err2;
tdata.token=NULL;
/* copy command line, react to errors */
cmd=GetCommandLine();
i=0;
while(cmd[i++]);
if((ptr=
HeapAlloc(tdata.heap,HEAP_NO_SERIALIZE,i))
==NULL)goto err3;
do
{
ptr[i]=cmd[i];
}
while(i--);
/* preset empty argument count and vector */
argc=0;
if((argv=HeapAlloc(tdata.heap,
HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,
sizeof(char *)))==NULL)
goto err3;
/* for all command line characters */
while(*ptr)
{
/* skip leading blank characters */
while(*ptr==' '||*ptr=='\t')ptr++;
/* memorize start of argument,
done if end of command line */
if(*ptr==0)break;
argv[argc]=ptr;
/* skip to end of argument */
i=0;
while(*ptr&&((*ptr!=' '&&
*ptr!='\t')||i))
{
if(*ptr=='\"')
{
i=1-i;
}
ptr++;
}
/* remove enclosing quotes
if there are any */
if(argv[argc][0]=='\"')
{
argv[argc]+=1;
if(*(ptr-1)=='\"')*(ptr-1)=0;
}
/* terminate argument and skip to
next character if necessary */
if(*ptr)*ptr++=0;
/* resize vector memory */
if((argv=HeapReAlloc(tdata.heap,
HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,
argv,
(++argc+1)*sizeof(char *)))==NULL)
goto err3;
}
/* get environment block and
preset empty environment vector */
if((cmd=(char *)GetEnvironmentStrings())
==NULL)goto err3;
i=0;
if((envp=HeapAlloc(tdata.heap,
HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,
sizeof(char *)))==NULL)
goto err4;
/* create environment vector array */
ptr=cmd;
while(*ptr||*(ptr+1))
{
/* skip string terminator if reqd */
if(!*ptr)ptr++;
/* skip strings starting with '=' */
if(*ptr=='=')
{
while(*ptr)ptr++;
continue;
}
/* store current pointer to
environment array */
envp[i]=ptr;
/* skip to next string terminator */
while(*ptr)ptr++;
/* resize vector memory,
react to errors */
if((envp=HeapReAlloc(tdata.heap,
HEAP_NO_SERIALIZE|HEAP_ZERO_MEMORY,
envp,(++i+1)*sizeof(char *)))==NULL)
goto err4;
}
/* call actual main procedure */
r=main(argc,argv,envp);
/* release environment block */
err4: FreeEnvironmentStrings(cmd);
/* deallocate heap */
err3: HeapDestroy( tdata.heap );
err2: TlsFree(tlsindex);
/* end process */
err1: ExitProcess(r);
}
|
|
Compiling and Linking (Windows) |
|
Compile this application with the following
command line (even if this line is multiple lines on your browser it is a single command
line) on Windows (MSVC 4.2), you'll have to replace <source>
with the file name of your source file: |
|
cl /O1 /Og /Os /Gf /Gr /GA /Gs
/MT <source> kernel32.lib
/link /STACK:4096 /HEAP:0
/NODEFAULTLIB /ENTRY:startup
/OPT:REF /MERGE:.idata=.data
/WS:aggressive /FIXED
|
|
Now here's the big surprise - only 2048
bytes! And if you look at resource consumption things are definitely for the better. |
|
Multithreading (Windows) |
|
Next question: how about threads? Well, the
code above is already prepared for multithreading. Start the
thread with something like: |
|
HANDLE hdl; /* thread handle */
DWORD rslt; /* result value */
if((hdl=CreateThread(NULL,0,
(void *)mythreadfunction,
(LPVOID)(mythreaddata),0,&rslt))
!=NULL)
{
/* thread is running */
}
CloseHandle(hdl);
|
|
Then place the following code in the main
function of each thread: |
|
/* thread local storage anchor data */
TLSANCHOR tdata;
if(!TlsSetValue(tlsindex,&tdata))
ExitThread(1);
if((tdata.heap=HeapCreate(
HEAP_NO_SERIALIZE,0,0))==NULL)
ExitThread(1);
tdata.token=NULL;
|
|
In subsequent functions of the thread you
then can access the members of the tdata structure with the
following construction: |
|
#define pdata \
((TLSANCHOR *)TlsGetValue(tlsindex))
|
|
pdata is then a
pointer to the tdata structure which is unique for every thread.
And what's the token member of tdata
for? Have a look at the manual pages of the strtok() function
and you'll get the drift. So let's call out replacement for this function stringtok()
and it looks as follows: |
|
char *stringtok(char *string,
char *list)
{
char *mem;
char *rover;
char **token=&(((TLSANCHOR *)
TlsGetValue(tlsindex))->token);
if(string)*token=string;
mem=*token;
while(**token)
{
for(rover=list;*rover;rover++)
{
if(**token==*rover)
{
mem++;
break;
}
}
if(!*rover)break;
(*token)++;
}
if(!*mem)return NULL;
while(**token)
{
for(rover=list;*rover;rover++)
{
if(**token==*rover)
{
*(*token)++=0;
return mem;
}
}
(*token)++;
}
return mem;
}
|
|
You see now how run time library functions
can be completely replaced. You could eve use inline assembler (nah - the bad word?!?) to
improve size and speed. |
|
|
|
|
OS/2 |
|
Welcome OS/2 developers. First of all you
need an object file, exeentry.obj. It is not hard to come by,
you have to extract it from a compiler library, e.g. cppom30.lib,
by using the ilib tool with the command *EXEENTRY.
Then we're going to write the given
application in a different way: |
|
/*********************************************/
/* headers */
/*********************************************/
#define INCL_DOS
#include <os2.h>
/*********************************************/
/* type definitions */
/*********************************************/
/* thread local storage structure */
typedef struct
{
char *token; /* memory for strtok() */
}
TLSANCHOR;
/*********************************************/
/* global variables */
/*********************************************/
TLSANCHOR **tlsindex=NULL;/*thread local mem */
/*********************************************/
/* regular functions (sample) */
/*********************************************/
void out(char *str)
{
unsigned long i=0;
while(str[i])i++;
DosWrite((HFILE)1,str,i,&i);
DosWrite((HFILE)1,"\r\n",2,&i);
}
int main(int argc,char *argv[],char **envp)
{
while(argc--)out(*argv++);
out("");
while(*envp)out(*envp++);
return 0;
}
/*********************************************/
/* startup function */
/*********************************************/
void _System _exestart(void)
{
TLSANCHOR tdata;/* main thread local storage */
TIB *tib; /* thread information unused */
PIB *pib; /* process information */
int i; /* general purpose usage */
char *ptr; /* general purpose pointer */
char *base; /* general purpose pointer */
int cnt; /* genreal purpose usage */
int argc; /* argc argument for main() */
char **argv; /* argv argument for main() */
char **envp; /* envp argument for main() */
int r=1; /* process return value */
/* allocate one chunk of thread local memory,
terminate in case of error */
if(DosAllocThreadLocalMemory(1,
(ULONG **)&tlsindex))
goto err1;
/* preset thread local memory */
*tlsindex=&tdata;
tdata.token=NULL;
/* get process information */
DosGetInfoBlocks(&tib,&pib);
/* preset empty argument count and vector */
cnt=256;
argc=0;
/* if a command line is present */
if(pib->pib_pchcmd)
{
/* allocate argument vector memory,
react to errors */
if(DosAllocMem((void **)&argv,4096,
PAG_COMMIT|PAG_READ|PAG_WRITE))
goto err2;
base=&((char *)(argv))[cnt];
/* preset argv[0] */
ptr=pib->pib_pchcmd;
argv[argc++]=base;
do
{
*base++=*ptr;
}
while(*ptr++);
/* memorize base, copy remaining
command line, recall base */
argv[argc]=base;
do
{
*base++=*ptr;
}
while(*ptr++);
ptr=argv[argc];
/* for all command line characters */
while(*ptr&&argc<cnt)
{
/* skip leading blank characters */
while(*ptr==' '||*ptr=='\t')ptr++;
/* memorize start of argument,
done if end of command line */
if(*ptr==0)break;
argv[argc]=ptr;
/* skip to end of argument */
i=0;
while(*ptr&&((*ptr!=' '&&*ptr!='\t')
||i))
{
if(*ptr=='\"')
{
i=1-i;
}
ptr++;
}
/* remove enclosing quotes
if there are any */
if(argv[argc][0]=='\"')
{
argv[argc]+=1;
if(*(ptr-1)=='\"')*(ptr-1)=0;
}
/* terminate argument and skip to
next character if necessary */
if(*ptr)*ptr++=0;
/* adjust argument count */
argc++;
}
}
/* count amount of environment
strings including terminator */
cnt=0;
ptr=pib->pib_pchenv;
do
{
while(*ptr++);
cnt++;
}
while(*ptr);
cnt++;
/* allocate memory for environment
array, exit in case of error */
if(DosAllocMem((void **)&envp,
((cnt*sizeof(char *))+4095)&4096,
PAG_COMMIT|PAG_READ|PAG_WRITE))
goto err3;
/* preset environment array */
cnt=0;
ptr=pib->pib_pchenv;
do
{
envp[cnt++]=ptr;
while(*ptr++);
}
while(*ptr);
envp[cnt]=NULL;
/* call actual main procedure */
r=main(argc,argv,envp);
/* release allocated memory */
DosFreeMem(envp);
err3: DosFreeMem(argv);
/* release thread local mem */
err2: DosFreeThreadLocalMemory(
(ULONG *)tlsindex);
/* exit process */
err1: DosExit(EXIT_PROCESS,r);
}
|
|
Compiling and Linking (OS/2) |
|
Compile this application with the following
command line (even if this line is multiple lines on your browser it is a single command
line) on OS/2 (VACPP 3), you'll have to replace <source>
with the file name of your source file: |
|
icc /W1 /Gn+ /Gm+ /Gd- /Gi+ /O+
/Oc+ /B"/SE:14 /ST:8192
/OPTF /PACKC /PACKD /NOLI /NOIN"
<source> exeentry.obj os2386.lib
|
|
Now here's the big surprise - only 2048
bytes! And if you look at resource consumption things are definitely for the better. |
|
Multithreading (OS/2) |
|
Next question: how about threads? Well, the
code above is already prepared for multithreading. Start the
thread with something like: |
|
if(!DosCreateThread(&tid,
(void(* _System)(unsigned long))
mythreadfunction,
mythreaddata, STACK_COMMITTED, 8192))
{
/* thread is running */
}
|
|
Then place the following code in the main
function of each thread: |
|
/* thread local storage anchor data */
TLSANCHOR tdata;
*tlsindex=&tdata;
tdata.token=NULL;
|
|
In subsequent functions of the thread you
then can access the members of the tdata structure with the
following construction: |
|
#define pdata (*tlsindex)
|
|
pdata is then a
pointer to the tdata structure which is unique for every thread.
And what's the token member of tdata
for? Have a look at the manual pages of the strtok() function
and you'll get the drift. So let's call out replacement for this function stringtok()
and it looks as follows: |
|
char *stringtok(char *string,
char *list)
{
char *mem;
char *rover;
char **token=&((*tlsindex)->token);
if(string)*token=string;
mem=*token;
while(**token)
{
for(rover=list;*rover;rover++)
{
if(**token==*rover)
{
mem++;
break;
}
}
if(!*rover)break;
(*token)++;
}
if(!*mem)return NULL;
while(**token)
{
for(rover=list;*rover;rover++)
{
if(**token==*rover)
{
*(*token)++=0;
return mem;
}
}
(*token)++;
}
return mem;
|
|
You see now how run time library functions
can be completely replaced. A word of warning, though. If you want to allocate and release
memory dynamically you'll have to write wrapper functions for DosAllocMem()
and DosFreeMem() that do proper large/small block
(de)allocation, otherwise you'll end up requesting one byte and getting 4096 bytes
allocated instead. If you write these routines assert that the memory management variables
required are members of tdata to be tread safe. |
|
Runtime Library Replacement Functions |
|
The following run time library replacement
functions should speck for themselves. Just replace string with str
and memory with mem and you do have
the names of the run time library functions the following ones do replace. Don't forget
the Windows and OS/2 replacement for the strtok()
routine given above. |
|
void stringcpy(char *dst,char *src)
{
while(*src)*dst++=*src++;
*dst=0;
}
void stringcat(char *dst,char *src)
{
while(*dst)dst++;
while(*src)*dst++=*src++;
*dst=0;
}
int stringlen(char *str)
{
int len=0;
while(*str++)len++;
return len;
}
char *stringchr(char *str,char c)
{
do
{
if(*str==c)return str;
}
while(*str++);
return NULL;
}
char *stringrchr(char *str,char c)
{
char *rover=str;
while(*rover++);
do
{
if(*(--rover)==c)return rover;
}
while(rover!=str);
return NULL;
}
int stringcmp(char *s1,char *s2)
{
s1--;
s2--;
do
{
if(*(++s1)!=*(++s2))
return *s1-*s2;
}
while(*s1&&*s2);
return 0;
}
int stringncmp(char *s1,char *s2,
int n)
{
s1--;
s2--;
if(n)
{
do
{
if(*(++s1)!=*(++s2))
return *s1-*s2;
}
while(*s1&&*s2&&--n);
}
return 0;
}
void memorycpy(void *_dst,
void *_src,int n)
{
char *src=(char *)_src;
char *dst=(char *)_dst;
while(n--)*dst++=*src++;
}
void memoryset(void *_dst,
char val,int n)
{
char *dst=(char *)_dst;
while(n--)*dst++=val;
}
|
|
|