home *** CD-ROM | disk | FTP | other *** search
- From: MX%"BJ06@C53000.PETROBRAS.ANRJ.BR" 29-SEP-1994 16:43:48.79
-
- To: MX%"noel@erich.triumf.ca"
-
- CC:
-
- Subj: FD3 for DOS (FD3DOS.CPP) - Source code
-
-
-
- Return-Path: <BJ06@C53000.PETROBRAS.ANRJ.BR>
-
- Received: from fpsp.fapesp.br by Erich.Triumf.CA (MX V4.0-1 VAX) with SMTP;
-
- Thu, 29 Sep 1994 16:42:40 PST
-
- Received: from DECNET-MAIL by fpsp.fapesp.br with PMDF#10108; Thu, 29 Sep 1994
-
- 09:26 BSC (-0300 C)
-
- Date: Thu, 29 Sep 1994 09:26 BSC (-0300 C)
-
- From: "FAUSTO Arinos de Almeida Barbuto (Totxo)"
-
- <BJ06@C53000.PETROBRAS.ANRJ.BR>
-
- Subject: FD3 for DOS (FD3DOS.CPP) - Source code
-
- To: noel@erich.triumf.ca
-
- Message-ID: <04B80DB3A0009621@fpsp.fapesp.br>
-
- X-Envelope-to: noel@erich.triumf.ca
-
- X-VMS-To: @NOEL
-
- References: ANSP network HEPnet SPAN Bitnet Internet gateway
-
- Comments: @FPSP.FAPESP.BR - @FPSP.HEPNET - @BRFAPESP.BITNET - .BR gateway
-
-
-
- /* BEGIN NOTICE
-
-
-
- Copyright (c) 1992 by John Sarraille and Peter DiFalco
-
- (john@ishi.csustan.edu)
-
-
-
- Permission to use, copy, modify, and distribute this software
-
- and its documentation for any purpose and without fee is hereby
-
- granted, provided that the above copyright notice appear in all
-
- copies and that both that copyright notice and this permission
-
- notice appear in supporting documentation.
-
-
-
- The algorithm used in this program was inspired by the paper
-
- entitled "A Fast Algorithm To Determine Fractal Dimensions By
-
- Box Counting", which was written by Liebovitch and Toth, and
-
- which appeared in the journal "Physics Letters A", volume 141,
-
- pp 386-390, (1989).
-
-
-
- This program is not warranteed: use at your own risk.
-
-
-
- -------------------------------------------------------
-
- DOS Version created by Fausto Arinos de Almeida Barbuto
-
- (BJ06@C53000.PETROBRAS.ANRJ.BR), September 23, 1994
-
- (Rio de Janeiro, Federal Republic of BRAZIL)
-
- -------------------------------------------------------
-
-
-
- END NOTICE */
-
-
-
- #include <stdio.h>
-
- #include <stdlib.h>
-
- #include "fd.h"
-
- #include <math.h>
-
-
-
- int empty_Av(long int);
-
- void push_Av(long int *, long int);
-
- void pop_Av(long int *, long int*);
-
- void create_Av(long int *);
-
- void print_Av(long int);
-
-
-
- int get_e_dim(char *);
-
- void max_min(char*, double *, double *);
-
- void rad_pass(int, int);
-
- void radixsort(void);
-
- void sweep(ulong[], double[], double[], double[]);
-
- float fracdim();
-
-
-
- int empty_Q(QHeader);
-
- void en_Q(QHeader *, long int);
-
- void de_Q(QHeader *, long int *);
-
- void create_Q(QHeader *);
-
- void transf_Q(QHeader *, QHeader *);
-
- void print_Q(QHeader *);
-
- void GetDims(double[], double[], double[], ulong, double*, double*, double*);
-
- void findMark(ulong *, ulong[]);
-
- void fitLSqrLine (long, long, double[], double[], double *, double *);
-
-
-
- int empty_Av(long int avail)
-
- // long int avail;
-
- {
-
- return (avail == -1);
-
- }
-
-
-
- /*
-
- Puts the info pointed to by "pointer" into the avail list. Use
-
- this after dequeuing an element if you want to recycle it. Note the
-
- need to access the global array "next_after", which holds all the
-
- pointers.
-
- */
-
- void push_Av(long int *p_avail, long int pointer)
-
- // long int *p_avail, pointer ;
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- next_after[pointer] = *p_avail;
-
- *p_avail = pointer;
-
- }
-
-
-
- /*
-
- Pops the info pointed to by "p_pointer" from the avail list. Use
-
- this if you need an element to place on a queue.
-
- */
-
- void pop_Av(long int *p_avail, long int *p_pointer)
-
- // long int *p_avail, *p_pointer;
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- *p_pointer = *p_avail;
-
- *p_avail = next_after[*p_avail];
-
- }
-
-
-
- /*
-
- Creates an avail list with room for all the data, plus two more
-
- elements to be used as markers.
-
- */
-
- void create_Av(long int *p_avail)
-
- // long int *p_avail;
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- extern ulong dataLines; /* number of data points in input file */
-
- unsigned long int i;
-
- for (i=0;i<dataLines+1;i++)
-
- next_after[i] = i+1;
-
- next_after[dataLines+1] = -1;
-
- *p_avail = 0;
-
- }
-
-
-
- /*
-
- This may prove to be useful when debugging.
-
- */
-
- void print_Av(long int avail)
-
- // long int avail;
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- extern int *marker; /* To mark queues during radix sort */
-
- extern int embed_dim; /* How many coordinates points have. */
-
- extern ulong **data; /* array of data points */
-
- long int temp;
-
- int coord;
-
- printf("Starting to print the avail list:\n\n");
-
- printf("Index\tNextField\tMarker?\t\tCoords\n\n");
-
- temp = avail;
-
- while (temp != -1)
-
- {
-
- printf("%ld\t%ld\t\t%d\t", temp, next_after[temp], marker[temp]);
-
- for (coord=0;coord<embed_dim;coord++)
-
- printf("\t%lu", data[coord][temp]);
-
- printf("\n");
-
- temp = next_after[temp];
-
- }
-
- printf("End of printing of avail list.\n");
-
- }
-
-
-
-
-
- int empty_Q(QHeader Q)
-
- // QHeader Q; /* The queue to be tested. */
-
- {
-
- return(Q.Qrear == -1);
-
- }
-
-
-
- /*
-
- This function enqueues the info at "pointer" into "*p_QHeader". It does
-
- NOT remove the info from the avail list, NOR does it check to see if the
-
- queue is full. Use with caution. BEFORE calling this function, you
-
- should free the info from the avail list and be sure somehow that
-
- "*p_QHeader" is not full. Note also that the function needs to access
-
- the array called "next_after" globally.
-
- */
-
-
-
- void en_Q(QHeader *p_QHeader, long int pointer)
-
- // QHeader *p_QHeader; /* Queue to enqueue. */
-
- // long int pointer; /* array index of info to go into the queue. */
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- next_after[pointer] = -1;
-
- if (empty_Q(*p_QHeader))
-
- p_QHeader->Qfront = pointer;
-
- else
-
- next_after[p_QHeader->Qrear] = pointer;
-
- p_QHeader->Qrear = pointer;
-
- }
-
-
-
- /*
-
- This Dequeuing function does NOT check to see if the queue is
-
- empty first, nor does it return the dequeued info to the avail
-
- list. The caller MUST take care of these things! Note also
-
- that the function needs to access the array called "next_after"
-
- globally.
-
- */
-
- void de_Q(QHeader *p_QHeader,long int *p_pointer)
-
- // QHeader *p_QHeader; /* Queue to be dequeued. */
-
- // long int *p_pointer; /* Pointer to the data to be returned */
-
- /* to the calling function. */
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- *p_pointer = p_QHeader->Qfront;
-
- if ((p_QHeader->Qfront) == (p_QHeader->Qrear))
-
- p_QHeader->Qrear = -1 ;
-
- p_QHeader->Qfront = next_after[p_QHeader->Qfront];
-
- }
-
-
-
- void create_Q (QHeader *p_QHeader)
-
- // QHeader *p_QHeader; /* Queue to be created. */
-
- {
-
- p_QHeader->Qfront = -1;
-
- p_QHeader->Qrear = -1;
-
- }
-
-
-
- /*
-
- This is a function to rapidly move an "element" from the source
-
- to the target queue. It bypasses the actions of placing the info
-
- into the avail list, and taking it back out again. It also
-
- avoids testing to see if the source queue is being emptied, and
-
- testing to see if the target queue is empty before the enqueue
-
- is done.
-
-
-
- Such a function is useful because there will be a great many of
-
- these operations done in the radix sort of a large set. For
-
- further time-savings, one should think about putting this code
-
- in-line, so that the overhead of function-calling can be saved.
-
-
-
- USE with EXTREME CAUTION. The effect of the function will be
-
- INCORRECT if the source has one element left, or the target is
-
- empty!
-
-
-
- Note the reliance on the global array "next_after".
-
- */
-
- void transf_Q(QHeader *p_Q1, QHeader *p_Q2)
-
- // QHeader *p_Q1, *p_Q2; /* source and target queues */
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- long int temp;
-
- temp = p_Q1->Qfront; /* save front of Q1 */
-
- p_Q1->Qfront = next_after[p_Q1->Qfront]; /* fast dequeue */
-
- next_after[temp] = -1; /* fast enqueue */
-
- next_after[p_Q2->Qrear] = temp; /* fast enqueue */
-
- p_Q2->Qrear = temp; /* fast enqueue */
-
- }
-
-
-
- /*
-
- This may come in handy when de-bugging. Note the reliance on
-
- the global arrays "next_after" and "data".
-
- */
-
- void print_Q(QHeader *p_Q)
-
- // QHeader *p_Q;
-
- {
-
- extern long int *next_after; /* array for pointers to data. */
-
- extern int *marker; /* To mark queues during radix sort */
-
- extern int embed_dim; /* How many coordinates points have. */
-
- extern ulong **data; /* array of data points */
-
- long int temp;
-
- int coord;
-
- printf("Starting to print the queue:\n\n");
-
- printf("Index\tNextField\tMarker?\t\tCoords\n\n");
-
- temp = p_Q->Qfront;
-
- while (temp != -1)
-
- {
-
- printf("%ld\t%ld\t\t%d\t", temp, next_after[temp], marker[temp]);
-
- for (coord=0;coord<embed_dim;coord++)
-
- printf("\t%lu", data[coord][temp]);
-
- printf("\n");
-
- temp = next_after[temp];
-
- }
-
- printf("End of printing of queue.\n");
-
- }
-
-
-
- /* BEGIN NOTICE
-
-
-
- Copyright (c) 1992 by John Sarraille and Peter DiFalco
-
- (john@ishi.csustan.edu)
-
-
-
- Permission to use, copy, modify, and distribute this software
-
- and its documentation for any purpose and without fee is hereby
-
- granted, provided that the above copyright notice appear in all
-
- copies and that both that copyright notice and this permission
-
- notice appear in supporting documentation.
-
-
-
- The algorithm used in this program was inspired by the paper
-
- entitled "A Fast Algorithm To Determine Fractal Dimensions By
-
- Box Counting", which was written by Liebovitch and Toth, and
-
- which appeared in the journal "Physics Letters A", volume 141,
-
- pp 386-390, (1989).
-
-
-
- This program is not warranteed: use at your own risk.
-
-
-
- END NOTICE */
-
-
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- int get_e_dim(char *userinputfile)
-
- // char *userinputfile;
-
- {
-
- extern FILE *infile;
-
- int c, count ; double temp ;
-
- if (debugging || checking) printf("Now inside get_e_dim.\n");
-
- if (debugging || checking) printf("About to open input file.\n");
-
- if((infile = fopen(userinputfile,"r")) == NULL) /* open input file */
-
- {
-
- printf("Cannot open input file, %s\n",userinputfile);
-
- exit(-1);
-
- }
-
- /* get past "dataLines", and the first number on the first line of
-
- actual data. */
-
- fscanf (infile, "%f%f", &temp, &temp);
-
- count = 1 ; /* One number on this line, so far. */
-
- do /* Read and count the rest of the numbers on this line. */
-
- {
-
- do c=getc(infile) ; /* Skip over same-line white space */
-
- while ( (c=='\t') || (c==' ') ) ;
-
- if ( (c != '\n') && (c != '\r') && (c !='\f') )
-
- { /* if we don't seem to be on a new line */
-
- count = count + 1 ; /* then increment # of numbers on this line */
-
- do c=getc(infile) ; /* and read past that next number */
-
- while ( (c != '\n') && (c != '\r') && (c !='\f')
-
- && (c !='\t') && (c != ' ') );
-
- }
-
- } while ( (c != '\n') && (c != '\r') && (c !='\f') ) ;
-
- fclose (infile);
-
- return (count);
-
- }
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- void max_min(char *userinputfile, double *pmax, double *pmin)
-
- // char *userinputfile;
-
- // double *pmax, *pmin ;
-
- {
-
- extern int embed_dim; /* How many coordinates points have. */
-
- FILE *infile; /* input file */
-
- double temp;
-
- ulong i, numToRead; /* control variable */
-
- ulong dataLines; /* number of data points in input file */
-
- /* open input file */
-
- if((infile = fopen(userinputfile,"r")) == NULL)
-
- {
-
- printf("Cannot open input file, %s\n",userinputfile);
-
- exit(-1);
-
- }
-
- /* get dataLines */
-
- if ((fscanf(infile,"%lu",&dataLines)) == 0)
-
- {
-
- printf ("Format of input file, %s, ", userinputfile);
-
- printf ("is incorrect -- please check.\n");
-
- exit(-1);
-
- }
-
- if (debugging || checking)
-
- { printf ("Now control is in max_min.\n");
-
- printf ("Number of data lines is %lu ...\n", dataLines);
-
- printf ("Starting to read data.\n");
-
- }
-
- /* find maximum and minimum data values in the input file.
-
- These are needed so that the data can be scaled to be a set
-
- of non-negative integers between 0 and maxdiam, where
-
- maxdiam will be the largest integer value expressible as an
-
- element of the data type "unsigned long int". */
-
-
-
- fscanf(infile,"%lf",&temp);
-
- if (debugging || checking) printf ("First datum is %lf.\n", temp);
-
- *pmin=*pmax=temp;
-
- if (debugging)
-
- printf ("Pmin is now: %lf, Pmax is now: %lf\n", *pmin, *pmax);
-
- numToRead = ((ulong) (embed_dim)) * dataLines;
-
- if (debugging)
-
- printf ("Total number of coordinates to read is: %lu\n", numToRead);
-
- for(i=1;i<numToRead;i++)
-
- {
-
- fscanf(infile,"%lf",&temp);
-
- if (debugging) printf ("Next datum read is: %lf\n", temp);
-
- if(temp > *pmax)
-
- *pmax = temp;
-
- else
-
- if(temp < *pmin)
-
- *pmin = temp;
-
- if (debugging) printf ("Pmin is now: %lf, Pmax is now: %lf\n", *pmin, *pmax);
-
- }
-
- /* check to see if the maximum equals the minimum -- this is a
-
- degenerate case -- or it might mean that the input file is
-
- } faulty. */
-
- if(*pmax == *pmin)
-
- {
-
- printf ("The input file, %s, is confusing!\n\n", userinputfile);
-
- printf ("Either all the points in %s have the same ", userinputfile);
-
- printf ("coordinates,\n");
-
- printf ("(THE FRACTAL DIMENSION IS ZERO IN THIS CASE.)\n\n");
-
- printf ("or %s is simply of the wrong form for an\n",userinputfile);
-
- printf ("input file to this program -- please check.\n");
-
- exit(-1);
-
- }
-
- /* close the input file */
-
- fclose(infile);
-
- }
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- /*
-
- This procedure performs one pass of the radix sort. It assumes that the
-
- queues each have a marker in front at the time of the call, and this is
-
- the condition the procedure LEAVES the queues in when it terminates.
-
- */
-
- void rad_pass(int coord, int bitpos)
-
- // int coord, /* The coordinate we are doing this pass on */
-
- // bitpos; /* The bit position we are doing this pass on */
-
- { extern int *marker; /* To mark queues during radix sort */
-
- extern QHeader Q[2]; /* array of two queues for the radix sort */
-
- extern ulong **data; /* initial array of data points */
-
- int queue_num;
-
- long int index ;
-
- /* Move the markers to the rear of the queues */
-
- if (debugging)
-
- printf ("Starting pass on bit %d of coord %d.\n",bitpos,coord);
-
- if (debugging) printf ("Moving markers to the rear of queues.\n");
-
- for (queue_num=0;queue_num<2;queue_num++) {
-
- de_Q(&(Q[queue_num]),&index);
-
- en_Q(&(Q[queue_num]),index);
-
- }
-
- if (debugging) print_Q(&Q[0]);
-
- if (debugging) print_Q(&Q[1]);
-
-
-
- /* Move all non-marker elements to the appropriate queue. */
-
- if (debugging) printf ("Starting main part of pass.\n");
-
- for (queue_num=0;queue_num<2;queue_num++)
-
- { /* Peek first to see if Qfront is a marker -- this violates
-
- information hiding, and would be "cleaner" if done with a
-
- procedure in the queue package. */
-
- while ( marker[Q[queue_num].Qfront] == 0 )
-
- { /* Peeking again! */
-
- if (IS_A_ONE(data[coord][Q[queue_num].Qfront],bitpos))
-
- /* Directly transfer from the source to target queue */
-
- transf_Q( &(Q[queue_num]),&(Q[1]) );
-
- else transf_Q( &(Q[queue_num]),&(Q[0]) ) ;
-
- if (debugging) printf("A queue transfer is done. \n");
-
- if (debugging) print_Q(&Q[0]);
-
- if (debugging) print_Q(&Q[1]);
-
- } } }
-
-
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- /*
-
- THIS SORT IS TO BE USED DIRECTLY AFTER FDDRIVER DOES THE INITIAL LOADING OF
-
- THE DATA INTO THE QUEUES. IT LEAVES THE DATA ESSENTIALLY SORTED, WITH ALL
-
- THE DATA WHOSE X-COORD STARTS WITH 0 IN Q[0], IN ORDER, AND ALL THE DATA
-
- WHOSE X-COORD STARTS WITH 1 IN Q[1], IN ORDER. THUS THE SWEEP THAT COMES
-
- NEXT MUST TRAVERSE Q[0], AND THEN Q [1].
-
- */
-
- void radixsort()
-
- {
-
- extern QHeader Q[2]; /* ARRAY OF TWO QUEUES FOR THE RADIX SORT */
-
- extern int embed_dim; /* HOW MANY COORDINATES POINTS HAVE. */
-
- extern long int avail; /* AVAIL LIST POINTER */
-
- int bitpos, coord, queue_num;
-
- long int index;
-
-
-
- /* FINISH UP ON THE ZERO-BIT -- LOADING DATA TOOK CARE OF FIRST PASS. */
-
- for (coord=embed_dim-2;coord>=0;coord--) rad_pass(coord,0);
-
-
-
- /* NOW SORT ON THE REST OF THE BITS. */
-
- for (bitpos=1;bitpos<numbits;bitpos++)
-
- for (coord=embed_dim-1;coord>=0;coord--) rad_pass(coord,bitpos);
-
-
-
- /* LASTLY, GET THE MARKERS OUT OF THE QUEUES. */
-
- for (queue_num=0;queue_num<2;queue_num++)
-
- {
-
- de_Q(&(Q[queue_num]),&index);
-
- push_Av(&avail,index);
-
- }}
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- /*
-
- THIS PROCEDURE TRAVERSES THE SORTED DATA POINT LIST, AND EXTRACTS THE
-
- INFORMATION NEEDED TO COMPUTE THE CAPACITY, INFORMATION, AND CORRELATION
-
- DIMENSIONS OF THE DATA. DATA IS CORRECTED ON THE FLY DURING THE TRAVERSAL,
-
- AND THEN "MASSAGED". AFTER "SWEEP" RUNS, WE NEED ONLY FIT A SLOPE TO THE
-
- DATA TO OBTAIN THE FRACTAL DIMENSIONS.
-
- */
-
- void sweep(ulong boxCountS[], double negLogBoxCountS[], double logSumSqrFreqS[],
-
- double informationS[])
-
-
-
- // ulong boxCountS[numbits+1]; /* COUNT BOXES OF EACH SIZE */
-
-
-
- // double
-
- /* FOR EACH BOX SIZE #s, negLogBoxCountS[s] WILL EVENTUALLY BE
-
- SET TO THE NEGATIVE OF THE LOG (BASE TWO) OF THE NUMBER OF
-
- BOXES OF SIZE #s THAT ARE OCCUPIED BY ONE OR MORE DATA
-
- POINTS.
-
- */
-
-
-
- // negLogBoxCountS[numbits+1],
-
-
-
- /* FOR EACH BOX SIZE #s, logSumSqrFreqS[s] WILL EVENTUALLY BE
-
- SET TO THE LOG (BASE TWO) OF THE PROBABILITY THAT TWO
-
- RANDOMLY CHOSEN DATA POINTS ARE IN THE SAME BOX OF SIZE
-
- #s.
-
- */
-
-
-
- // logSumSqrFreqS[numbits+1],
-
-
-
- /* FOR EACH BOX SIZE #s, informationS[s] WILL EVENTUALLY BE
-
- SET TO THE INFORMATION (BASE TWO) IN THE DISTRIBUTION OF
-
- DATA POINTS IN THE BOXES OF SIZE #s.
-
- */
-
- // informationS[numbits+1] ;
-
-
-
- { extern int embed_dim; /* HOW MANY COORDINATES POINTS HAVE. */
-
- extern ulong **data, /* INITIAL ARRAY OF DATA POINTS */
-
- dataLines ;
-
- extern QHeader Q[2]; /* ARRAY OF TWO QUEUES FOR THE RADIX SORT */
-
- extern ulong *diff_test; /* XOR'S OF PAIRS OF COORDINATES */
-
- extern long int *next_after; /* ARRAY FOR POINTERS TO DATA. */
-
-
-
- int bitpos, countpos, coord, queue_num, found;
-
- ulong pointCount[numbits+1] ;
-
- long int current, previous;
-
- double sumSqrFreq[numbits+1], freq, log2=log(2.0) ;
-
-
-
- /* GET A POINTER TO THE FIRST DATA VALUE */
-
- if (Q[0].Qfront != -1) previous = Q[0].Qfront;
-
- else previous = Q[1].Qfront;
-
-
-
- /* INIT boxCountS, pointCountS, sumSqrFreq, AND informationS.*/
-
- for(countpos=0;countpos<=numbits;countpos++) {
-
- boxCountS[countpos]=1;
-
- sumSqrFreq[countpos]=0.0;
-
- pointCount[countpos]=1;
-
- informationS[countpos]=0.0;
-
- }
-
-
-
- for (queue_num=0;queue_num<2;queue_num++)
-
- { current = Q[queue_num].Qfront;
-
- while (current != -1)
-
- { found = 0 ;
-
- /* START BY LOOKING AT THE BIGGEST BOX SIZE */
-
- bitpos=numbits-1;
-
- for (coord=embed_dim-1;coord>=0;coord--)
-
- diff_test[coord] = data[coord][previous]^ data[coord][current];
-
- do {coord = embed_dim - 1;
-
- do {/* IF THE CURRENT POINT AND PREVIOUS POINTS ARE IN DIFFERENT
-
- BOXES OF THIS SIZE, */
-
- if ( IS_A_ONE(diff_test[coord],bitpos) )
-
- {/* THEN THE CURRENT POINT IS IN NEW BOXES OF ALL SMALLER
-
- SIZES TOO, AND STILL IN THE SAME BOXES OF LARGER SIZES,
-
- SO ... */
-
- for (countpos=bitpos;countpos>=0;countpos--)
-
- {/* CALCULATE FREQUENCY OF POINTS IN THE BOX, ASSUMING FOR
-
- NOW THAT THE NUMBER OF DATA LINES IN THE INPUT FILE IS
-
- THE NUMBER OF DISTINCT POINTS IN THE DATA SET. WE
-
- ADJUST THIS AT THE END OF THIS FUNCTION. */
-
- if (debugging)
-
- printf("pointCount[%d] is %d...\n", countpos, pointCount[countpos]);
-
- freq = pointCount[countpos]/(double)dataLines ;
-
- /* WE WILL ENCOUNTER NO MORE OF THE POINTS IN THE BOX
-
- WE JUST LEFT (THE SPECIAL ORDERING OF THE SORT WE
-
- USED ABOVE GUARANTEES THIS!), SO WE COMPUTE WHAT
-
- THIS BOX CONTRIBUTES TO THE RUNNING SUMS. */
-
- sumSqrFreq[countpos] += (freq * freq) ;
-
- informationS[countpos] += ( freq*log(freq)/log2 ) ;
-
- /* WE HAVE GOTTEN INTO A NEW BOX AT THIS LEVEL, SO WE
-
- REFLECT THE NEW BOX IN THE COUNT */
-
- boxCountS[countpos]++ ;
-
- /* SINCE WE HAVE A NEW BOX AT THIS LEVEL, THERE IS ONLY
-
- ONE KNOWN POINT IN IT SO FAR -- THE CURRENT POINT */
-
- pointCount[countpos]=1;
-
- }
-
- for (countpos=bitpos+1;countpos<=numbits;countpos++)
-
- /* THE CURRENT POINT IS IN THE BOXES AT THESE LEVELS, SO
-
- JUST INCREMENT THE POINT COUNTER. */
-
- pointCount[countpos]++ ;
-
- found = 1;
-
- }
-
- else coord-- ;
-
- }
-
- while ( (found == 0) && (coord > -1) );
-
- bitpos-- ;
-
- }
-
- while ( (found == 0) && (bitpos > -1) );
-
- previous = current; current = next_after[current];
-
- } }
-
-
-
- /* NOW ADD IN THE CONTRIBUTION DUE TO THE COUNTS REMAINING AFTER THE
-
- LAST POINT HAS BEEN FOUND, RENORMALIZE WITH BOXCOUNT[0], AND MASSAGE
-
- THE RAW DATA FROM THE TRAVERSAL SO THAT IS IS READY FOR THE LEAST
-
- SQUARES FITTING. */
-
-
-
- for (countpos=numbits;countpos>=0;countpos--)
-
- {
-
- negLogBoxCountS[countpos] = -log((double)boxCountS[countpos])/log(2.0);
-
-
-
- if (debugging)
-
- printf("pointCount[%d] is %d...\n", countpos, pointCount[countpos]);
-
- freq = pointCount[countpos]/(double)dataLines ;
-
-
-
- sumSqrFreq[countpos] += (freq * freq) ;
-
- sumSqrFreq[countpos] *= (dataLines/(double) boxCountS[0]) ;
-
- sumSqrFreq[countpos] *= (dataLines/(double) boxCountS[0]) ;
-
-
-
- /* sumSqrFreq[countpos] NOW CONTAINS THE SUM OF THE SQUARES OF THE
-
- FREQUENCIES OF POINTS IN ALL OCCUPIED BOXES OF THE SIZE
-
- CORRESPONDING TO countpos. */
-
-
-
- logSumSqrFreqS[countpos] = log(sumSqrFreq[countpos])/log(2.0) ;
-
- informationS[countpos] += ( freq*log(freq)/log(2.0) ) ;
-
- informationS[countpos] *= (dataLines/(double)boxCountS[0]) ;
-
- informationS[countpos] +=
-
- ( log((double)dataLines)-log((double)boxCountS[0]) ) / log(2.0) ;
-
-
-
- /* information[countpos] NOW CONTAINS THE INFORMATION SUM FOR ALL THE
-
- OCCUPIED BOXES OF THIS SIZE. */
-
-
-
- }
-
- }
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- /* FIT LEAST SQUARE LINE TO DATA IN X,Y. NO PROTECTION AGAINST OVERFLOW
-
- HERE. IT IS ASSUMED THAT LAST > FIRST AND THAT THE X'S ARE NOT ALL THE
-
- SAME -- ELSE DIVISION BY ZERO WILL OCCUR. */
-
- void fitLSqrLine (long first, long last, double X[], double Y[],
-
- double *slopePtr, double *interceptPtr)
-
- // long first, last ;
-
- // double X[], Y[], *slopePtr, *interceptPtr ;
-
- {
-
- int index , pointCount ;
-
- double Xsum=0, Ysum=0, XYsum=0, XXsum=0, Xmean=0, Ymean=0,
-
- Xtemp, Ytemp;
-
- for (index=first; index<=last; index++)
-
- { Xtemp = X[index]; Ytemp = Y[index];
-
- Xsum += Xtemp; Ysum += Ytemp;
-
- XYsum += (Xtemp * Ytemp); XXsum += (Xtemp * Xtemp); }
-
- pointCount = last - first + 1 ;
-
- Xmean = Xsum/pointCount; Ymean = Ysum/pointCount;
-
- *slopePtr = (XYsum - Xsum * Ymean)/(XXsum - Xsum * Xmean) ;
-
- *interceptPtr = Ymean - *slopePtr * Xmean ;
-
- }
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- /*
-
- MARK GREATEST INDEX WHERE COUNT > boxCountF[0]/cutOff_factor.
-
-
-
- COUNTS AT LESSER INDEXES WILL NOT BE USED IN THE ESTIMATE OF FRACTAL
-
- DIMENSION -- DISTORTION DUE TO SATURATION IS THE CONCERN.
-
-
-
- NOTE THAT boxCountF[0] IS THE NUMBER OF BOXES OF SIZE 1 (THE SMALLEST SIZE)
-
- THAT CONTAIN A POINT OF THE SET. FOR ALL PRACTICAL PURPOSES, boxCountF[0]
-
- WILL EQUAL THE NUMBER OF DISTINCT POINTS IN THE INPUT FILE, BECAUSE THESE
-
- BOXES ARE REALLY SMALL COMPARED TO THE SIZE OF THE BIGGEST BOX (ABOUT 4
-
- BILLION IF AN UNSIGNED LONG INT IS 32 BITS TO THE PLATFORM COMPILER. THE
-
- POINTS ARE SCALED BY THE PROGRAM SO THAT THE SET IS TOO "LARGE" TO FIT IN
-
- THE NEXT SMALLEST BOX SIZE, SO THAT "1" IS THE SMALLEST DIFFERENCE IN
-
- VALUE THAT CAN BE RESOLVED.) ONE BOX, IN EFFECT, COVERS ONLY A SINGLE POINT
-
- OF THE INPUT SET BECAUSE THE PROGRAM CAN'T RESOLVE POINTS WITH A SMALLER
-
- DIFFERENCE.
-
-
-
- WE THINK IT WOULD BE A BAD IDEA TO USE dataLines/cutOff_factor AS THE LIMIT
-
- BECAUSE IN CASES WHERE THERE WERE MANY DUPLICATE POINTS, WE WOULD SERIOUSLY
-
- OVER-ESTIMATE THE NUMBER OF DISTINCT POINTS, AND THUS USE SATURATED DATA TO
-
- BASE THE ESTIMATE OF FRACTAL DIMENSION UPON. WHEN TESTING THE PROGRAM WITH
-
- RANDOM DATA SAMPLED WITH REPLACEMENT, THIS COULD THROW THE RESULTS WAY OFF.
-
- (THIS HAPPENED TO US, AND IT TOOK US A WHILE TO FIGURE OUT WHY. AFTERWARDS,
-
- WE STOPPED USING dataLines/cutOff_factor, AND CHANGED TO
-
- boxCountF[0]/cutOff_factor.)
-
- */
-
- void findMark(ulong *markPtr, ulong boxCountM[])
-
- // ulong *markPtr, boxCountM[numbits+1] ;
-
- {
-
- int i, cutOff_factor=1;
-
-
-
- /* Calculate cutOff_factor = 2^(embed_dim) + 1 */
-
- for (i=1;i<=embed_dim;i++) cutOff_factor = cutOff_factor * 2;
-
- cutOff_factor++;
-
-
-
- *markPtr=0;
-
- for(i=0;i<numbits;i++)
-
- { if(boxCountM[i] > boxCountM[0]/cutOff_factor) *markPtr=i; }
-
- }
-
-
-
- /* ################################################################## */
-
- /* ################################################################## */
-
- void GetDims(double negLogBoxCountF[], double logSumSqrFreqF[],
-
- double informationF[], ulong markF,
-
- double *capDimPtr, double *infDimPtr, double *corrDimPtr)
-
- // ulong markF ;
-
- // double negLogBoxCountF[numbits+1], informationF[numbits+1],
-
- // logSumSqrFreqF[numbits+1],
-
- // *capDimPtr, *infDimPtr, *corrDimPtr;
-
- {
-
- int i;
-
- double logEps[numbits+1], slope, intercept;
-
-
-
- /* GET LOG (BASE 2) OF THE DIAMETER OF THE I'TH SIZE OF BOX. */
-
- for(i=numbits; i>=0; i--) logEps[i] = i;
-
-
-
- /* fitLSqrLine (markF, numbits-4, logEps, negLogBoxCountF, &slope,&intercept);*/
-
- fitLSqrLine (markF, numbits-2, logEps, negLogBoxCountF, &slope, &intercept);
-
- *capDimPtr = slope ;
-
- /*fitLSqrLine(markF, numbits-4, logEps, informationF, &slope, &intercept);*/
-
- fitLSqrLine(markF, numbits-2, logEps, informationF, &slope, &intercept);
-
- *infDimPtr = slope ;
-
- /*fitLSqrLine(markF,numbits-4, logEps, logSumSqrFreqF, &slope, &intercept);*/
-
- fitLSqrLine(markF,numbits-2, logEps, logSumSqrFreqF, &slope, &intercept);
-
- *corrDimPtr = slope ;
-
- }
-
- /* ################################################################## */
-
- /* ################################################################## */
-
-
-
- void main(int argc, char *argv[])
-
- // int argc;
-
- // char *argv[];
-
- {
-
- extern FILE *infile;
-
- extern int embed_dim, /* How many coordinates points have. */
-
- *marker; /* To mark queues during radix sort */
-
- extern ulong **data, /* initial array of data points */
-
- dataLines, /* number of data points in input file */
-
- *diff_test; /* XOR's of pairs of coordinates */
-
- extern long int *next_after, /* array for pointers to data. */
-
- avail; /* avail list pointer */
-
- extern QHeader Q[2]; /* array of two queues for the radix sort */
-
-
-
-
-
- int i,j,m;
-
- ulong maxdiam=0, /* 2^numbits - 1: determines the scaling. */
-
- boxCount[numbits+1], /* Array to keep track of the box
-
- counts of each size. Each index
-
- is the base-2 log of the
-
- corresponding box size. */
-
- pointCount[numbits+1], /* Array to keep track of how many points
-
- of the data set are contained in each
-
- box */
-
- mark; /* Marks smallest usable box count */
-
- double max, min, /* Maximum and minimum values in input file-- we
-
- need to know these in order to scale the
-
- input points. */
-
- buf, /* A buffer for inputs to stay in until they are
-
- scaled and converted to integers. */
-
- negLogBoxCount[numbits+1],
-
- logSumSqrFreq[numbits+1],
-
- information[numbits+1],
-
- capDim, infDim, corrDim;
-
- long int pointer; /* holds indexes used as pointers to records */
-
-
-
- printf("\n\n") ;
-
- printf("******************************************************************\n");
-
- printf(" FRACTAL DIMENSION REPORT -- by fd software (DiFalco/Sarraille)\n");
-
- printf("******************************************************************\n");
-
- printf("\n") ;
-
-
-
- /* IDENTIFY THE INPUT FILE */
-
- printf ("\nReporting on file named: %s.\n\n", argv[1]) ;
-
-
-
- /* FIND OUT HOW MANY COORDINATES POINTS HAVE -- 1?, 2?, 3?, MORE? */
-
- printf("Getting the embedding dimension ...\n");
-
- embed_dim = get_e_dim(argv[1]);
-
- printf("Embedding Dimension taken to be: %d\n\n",embed_dim);
-
-
-
- /*
-
- find max and min in input file. This function should open the input file
-
- for reading, scan it to get the maximum and minimum data values it
-
- contains, and then seek to the beginning of the input file so that the
-
- code below can proceed to read the data sequentially from the beginning.
-
- We are having trouble getting that to work, so we have temporarily taken
-
- the expedient of closing the file in the function max_min, and opening it
-
- again below.
-
- */
-
- printf("Finding max and min values in data ...");
-
- fflush(stdout);
-
- max_min (argv[1], &max, &min) ;
-
- printf ("\n\nMinimum value in input file is: %lf ...\n", min) ;
-
- printf ("Maximum value in input file is: %lf ...\n\n", max) ;
-
- /*
-
- set maxdiam=2^numbits-1 -- this is the largest value that an
-
- unsigned integer can have with the host compiler.
-
- (I checked the value obtained in this manner, and it
-
- is correct -- j.s.)
-
- */
-
- if (debugging || checking) printf("numbits is: %d\n",numbits);
-
- printf("%d different cell sizes will be used ...\n",numbits);
-
- for (m=0;m<numbits;m++) maxdiam = maxdiam + (1<<m);
-
- printf("Data will be shifted and re-scaled so that minimum coordinate\n");
-
- printf("value is ZERO and maximum coordinate value is: %lu ...\n\n",maxdiam);
-
-
-
- /* open input file */
-
- if((infile = fopen(argv[1],"r")) == NULL)
-
- {
-
- printf("Cannot open input file, %s\n",argv[1]);
-
- exit(-1);
-
- }
-
-
-
- /* get number of data points from input file */
-
- fscanf(infile,"%lu",&dataLines);
-
- if (debugging || checking) printf("dataLines is: %ld\n",dataLines);
-
-
-
- /*
-
- In the next few statements, we allocate "parallel arrays" which are
-
- being used to implement records containing fields for as many
-
- coordinates as the embedding dimension requires, for a "next_after"
-
- pointer field to be used to link the records into lists, and for a
-
- "marker" field to indicate whether the element is being used to mark the
-
- end of a pass in the radix sort.
-
- */
-
- /* allocate memory to point to a coordinate array for each coordinate --
-
- embedding dimension determines how many coordinates.
-
- */
-
-
-
- printf("Allocating storage ...");
-
- fflush(stdout);
-
-
-
- if ( (data = (ulong **) malloc (embed_dim*sizeof(ulong *))) == NULL)
-
- {
-
- printf("Memory allocation failure: \"data\".\n");
-
- exit(-1);
-
- }
-
-
-
- /* Here we allocate the storage for the coordinates of data points, plus
-
- extra room for 2 queue markers needed in the radix sort.
-
- */
-
- for (i=0;i<embed_dim;i++)
-
- if ((data[i] = (ulong *) malloc ((dataLines+2)*sizul)) == NULL)
-
- {
-
- printf("Memory allocation failure: \"data[%d]\").\n",i);
-
- exit(-1);
-
- }
-
-
-
- /* Here we allocate the storage for the pointers. These are implemented as
-
- long integers. The values of these pointers will be used as indexes into
-
- the arrays being declared here. Note that here we also allocate room for
-
- two markers.
-
- */
-
- if((next_after=(long *)malloc((dataLines+2)*sizeof(long))) == NULL)
-
- {
-
- printf("Memory allocation failure: \"next_after\" array).\n");
-
- exit(-1);
-
- }
-
-
-
- /* Here we allocate storage for a tag field that tells whether a record is a
-
- marker.
-
- */
-
-
-
- if ( (marker = (int *) malloc( (dataLines+2)*sizeof(int) ) ) == NULL)
-
- {
-
- printf("Memory allocation failure: \"marker\" array).\n");
-
- exit(-1);
-
- }
-
-
-
- /* Here we allocate storage for a word that describes the bit changes
-
- between two different unsigned long int's. We have one of these words
-
- for each of the embedding dimensions. These will come in handy when the
-
- sweep is done that gets the box counts.
-
- */
-
-
-
- if ( (diff_test = (ulong *) malloc ((embed_dim)*sizeof(ulong)) ) == NULL)
-
- {
-
- printf("Memory allocation failure: \"diff_test\" array).\n");
-
- exit(-1);
-
- }
-
-
-
- printf(" Done ...\n");
-
-
-
- /* Initialize the queues to an empty state. We load the data directly into
-
- these queues, and use them afterwards to do the radix sort and the sweep.
-
- */
-
- printf("Initializing queues ... ");
-
- fflush(stdout);
-
-
-
- create_Q(&Q[0]); create_Q(&Q[1]);
-
- if (debugging || checking) printf ("Just created queues.\n");
-
- if (debugging) print_Q(&Q[0]);
-
- if (debugging) print_Q(&Q[1]);
-
-
-
- /* Initialize the avail list to hold all the records implemented by the
-
- parallel arrays.
-
- */
-
- create_Av(&avail) ;
-
- if (debugging || checking) printf ("Just created avail.\n");
-
- if (debugging) print_Av(avail);
-
-
-
- /* Place a marker in each queue. Markers will be in front of each queue
-
- after all the data has been loaded.
-
- */
-
- for (i=0;i<2;i++)
-
- {
-
- pop_Av(&avail, &pointer);
-
- marker[pointer] = 1;
-
- en_Q(&(Q[i]),pointer);
-
- }
-
-
-
- printf("Done ...\n");
-
-
-
- /* Place all the data points in the appropriate queue. */
-
-
-
- printf ("Loading data ... ");
-
- fflush(stdout);
-
-
-
- for(i=0;i<dataLines;i++)
-
- {
-
- pop_Av(&avail, &pointer); /* get next record */
-
- marker[pointer] = 0; /* it is not a marker */
-
- for (j=0; j<embed_dim;j++)
-
- { fscanf(infile,"%lf",&buf); /* put scaled data in array */
-
- if (debugging) printf ("Just read %lf\n",buf);
-
- data[j][pointer]=(ulong)((buf-min)*(maxdiam/(max-min)));
-
- }
-
- /* Get started on the radix sort by putting the data in the
-
- queue corresponding to the least significant bit of the
-
- last coordinate. */
-
- if (IS_A_ONE(data[embed_dim-1][pointer],0))
-
- en_Q(&(Q[1]),pointer); /* last coord ends in "1" */
-
- else en_Q(&(Q[0]),pointer); /* last coord ends in "0" */
-
- }
-
-
-
- /* close the input file */
-
- fclose(infile);
-
- printf ("Done ...\n");
-
- if (debugging) print_Av(avail);
-
- if (debugging) print_Q(&Q[0]);
-
- if (debugging) print_Q(&Q[1]);
-
-
-
- /* radix sort queues */
-
-
-
- printf("Sorting the data ... ");
-
- fflush(stdout);
-
-
-
- radixsort();
-
-
-
- printf("Done ... \n");
-
-
-
- /* sweep data */
-
-
-
- printf("Doing sweep to get counts ... ");
-
- fflush(stdout);
-
-
-
- sweep(boxCount, negLogBoxCount, logSumSqrFreq, information);
-
-
-
- printf("Done ...\n\n");
-
-
-
- findMark(&mark, boxCount) ;
-
-
-
- /* GET RID OF THIS LINE WHEN DONE WITH TEST!!! */
-
- /* mark = 24; */
-
-
-
- /* WRITE COUNTS AND DERIVED DATA TO STANDARD OUTPUT. */
-
- printf("\n");
-
- printf("[log(epsl)]"); printf(" [CellCount]");
-
- printf(" [log(CellCount)]"); printf(" [informtn]");
-
- printf(" [-log(SumSqrFreqs)]\n");
-
-
-
- printf(" = [log(e)]"); printf(" = [N(e)] ");
-
- printf(" = [logN(e)] "); printf(" = [I(e)] ");
-
- printf(" = [-logSSF(e)]\n\n");
-
-
-
- for(i=0;i<=numbits;i++)
-
- {
-
- if ( (mark < numbits-2) && (i == mark) )
-
- { printf ("**************************************");
-
- printf ("**************************************\n"); }
-
- printf("%9d", i );
-
- printf("%13lu", boxCount[i]);
-
- printf("%18.5f", -negLogBoxCount[i]);
-
- printf("%12.5f", -information[i]);
-
- printf("%21.5f\n", -logSumSqrFreq[i]);
-
- if ( (mark < numbits-2) && (i == numbits-2) )
-
- { printf ("**************************************");
-
- printf ("**************************************\n"); }
-
- }
-
-
-
- printf ("\n\n\nTwo-Point Estimates of FD's:\n\n");
-
- printf("[log(e)]"); printf(" [logN(e)-logN(2e)]");
-
- printf(" [I(e)-I(2e)]"); printf(" [logSSF(2e)-logSSF(e)]\n\n");
-
-
-
- for(i=0; i<=numbits-1; i++)
-
- {
-
- if ( (mark < numbits-2) && (i == mark) )
-
- { printf ("**************************************");
-
- printf ("**************************************\n"); }
-
- printf ("%8d",i);
-
- printf ("%20.5f",negLogBoxCount[i+1]-negLogBoxCount[i]);
-
- printf ("%14.5f",information[i+1]-information[i]);
-
- printf ("%24.5f\n",logSumSqrFreq[i+1]-logSumSqrFreq[i]);
-
- if ( (mark < numbits-2) && (i==numbits-3) )
-
- { printf ("**************************************");
-
- printf ("**************************************\n"); }
-
- }
-
-
-
- if (mark >= numbits-2)
-
- {
-
- printf("\nInsufficient Data. More points are needed.\n") ;
-
- printf("Cannot assign a Fractal Dimension to this set.\n");
-
- printf("Examine the data above to make your own conjecture.\n");
-
- exit(-1) ;
-
- }
-
-
-
- printf ("\n\n\n%d is the smallest cell size used in ", mark);
-
- printf ("the overall dimension estimates\n");
-
- printf ("below. The largest cell size is ");
-
- printf ("%d. Data above corresponding to\n", numbits-2);
-
- printf ("this range is between rows of asterisks.\n\n");
-
-
-
- GetDims(negLogBoxCount, logSumSqrFreq, information, mark,
-
- &capDim, &infDim, &corrDim) ;
-
-
-
- printf("\n\nLeast-Square Estimates based on Indicated Cell Range:\n\n");
-
- printf("Fractal Dimension (Capacity) = %.5f\n", capDim );
-
- printf("Fractal Dimension (Information) = %.5f\n", infDim );
-
- printf("Fractal Dimension (Correlation) = %.5f\n", corrDim);
-
- printf("\n\n****************************************\n");
-
- }
-
-