home *** CD-ROM | disk | FTP | other *** search
- /***************************************************************************
- * Copyright (C) 1994 Charles P. Peterson *
- * 4007 Enchanted Sun, San Antonio, Texas 78244-1254 *
- * Email: Charles_P_Peterson@fcircus.sat.tx.us *
- * *
- * This is free software with NO WARRANTY. *
- * See gfft.c, or run program itself, for details. *
- * Support is available for a fee. *
- ***************************************************************************
- *
- * Program: gfft--General FFT analysis
- * File: cfft.c
- * Purpose: fast Fourier analysis function with complex i/o array
- * Author: Charles Peterson (CPP)
- * History: 4-June-1993 CPP; Created.
- * 9-March-1994 CPP; added SAVE_TRIG_OPT sections.
- * 6-Feb-95 CPP (1.31); Progress Requester
- * Comment:
- * Inspired by the algorithm expressed in four1.c, page 411-412 in
- * Numerical Recipes in C (William H. Press, Brian P. Flannery, Saul A.
- * Teukolsky, William T. Vetterling. Cambridge University Press, 1988.)
- * In turn, inspiration for their work came from N. Brenner of Lincoln
- * Laboratories, Danielson, Lanczos, J. W. Cooley, J. W. Tukey, and many
- * others, including Fourier, for whom the underlying analysis is named.
- * This, however, is an original implementation without any copied code.
- * I hope it is much easier to read and understand than its predecessors.
- * If nothing else, it is very verbose, and doesn't require an intuitive
- * understanding of all trigonometric identities. Things got a little
- * worse when I added an option to save the previously used trig values,
- * but most people would consider that (in principle) worthwhile.
- *
- * Single precision floating point, except for trigonometric values
- * which are computed in double precision.
- */
-
- #define PROGRESS_INDICATOR
- #define SAVE_TRIG_OPT
- /*
- * Comment the above lines out for easiest use outside GFFT.
- *
- * You will lose the optional saving of trig values, but (see note below)
- * that really doesn't make a big difference in performance.
- *
- * Otherwise, when using this code outside GFFT you will have to duplicate
- * the GFFT memory allocation function, or modify this file accordingly
- * (e.g. with an error return if the memory demanded cannot be allocated).
- *
- * Note: When SAVE_TRIG_OPT is defined, as in GFFT, there is a BOOLEAN
- * global variable SaveMemory which disables the saving of trig values
- * when set.
- */
-
- #include <math.h>
-
- #ifdef _AMIGA
- #ifdef _M68881
- #include <m68881.h>
- #endif
- #endif
-
- #include "complex.h"
-
- #ifdef SAVE_TRIG_OPT
- #include "gfft.h" /* gmalloc safe allocation (longjmps on error) */
- #include "settings.h" /* SaveMemory option */
- #endif
-
- #ifdef PROGRESS_INDICATOR
- #include "wbench.h"
- #define PROGRESS_INCREMENT 1024
- long Inner_Loop_Count = 0;
- extern double Percent_Per_Loop;
- extern double Initial_Progress;
- static long next_progress_count = 0;
- #endif
-
- void cfft (Complex_float datac[], long n, int isign)
- {
- /* ARGUMENTS:
- *
- * datac input/output Complex_float data array
- * transformed according to isign (see below)
- *
- * n number of complex data elements
- * *** n MUST BE A POWER OF 2 ***
- * *** THIS IS NOT CHECKED FOR ***
- *
- * isign if 1, datac is replaced with its discrete Fourier transform
- * if -1, datac is replaced with n times its INVERSE
- * discrete Fourier transform
- */
-
- #ifdef SAVE_TRIG_OPT
- static Complex_float *wkvector = NULL;
- static last_n = 0;
- #endif
-
- #ifdef PROGRESS_INDICATOR
- next_progress_count = 0;
- #endif
-
-
- /*
- * This function is divided into 2 or 3 main blocks to minimize the scope
- * of variables involved and hopefully to encourage better register use.
- */
-
- /* In the first block we sort the input array of complex numbers into
- * bit reversal order (e.g. the value at binary index ...001 is swapped
- * with the value at binary index 100...)
- *
- * While i counts forwards from 1, j counts "inversely" from binary 100...
- *
- * Skip i==0 and i==n-1 (They never get swapped anyway)
- */
-
- {
- long const halfn = n / 2;
- long const nminus1 = n - 1;
- long register j = halfn;
- long register i = 1;
-
- for (; i < nminus1; i++)
- {
- if (j > i) /* If move required, and not done before */
- {
- CF_SWAP (datac[i], datac[j]);
- }
- /*
- * The following sub-block is for inverse counting.
- * To count inversely, we clear highest set bits until reaching
- * the first clear bit, then set it.
- */
- {
- long register scanbit = halfn;
- while (j >= scanbit && scanbit > 0)
- {
- j -= scanbit;
- scanbit >>= 1;
- }
- j += scanbit;
-
- } /* End inverse counting sub-block */
-
- } /* End for (i = 1, 2, ..., n-1) */
-
- } /* End sorting block */
- /*
- * The following block(s) are the Danielson-Lanczos section of the routine.
- * Here we combine the transforms, initially of one value each, by two
- * at a time giving us transforms twice as large, until everything has
- * been combined into one big transform.
- *
- * Thanks to clever sorting, each transform of 'even' elements is
- * followed by a transform of 'odd' elements during each combination.
- *
- * mathematical summary (in which = denotes equality or definition):
- * (A transform of length one simply copies input to output)
- * F[k] = F[k][even] + (w ** k) * F[k][odd]
- * (w ** k) = - (w ** (k + N/2))
- * w = e ** (2 * PI * i / N)
- * e ** (i * theta) = cos (theta) + i * sin (theta)
- * i = sqr (-1)
- * e and PI are the named transcendental numbers
- * cos and sin are the named trigonometric functions
- * N is the number of complex samples; k is a number 0..N-1
- */
-
- #ifdef SAVE_TRIG_OPT
- if (SaveMemory)
- #endif
-
- /*
- * This is the unaccelerated version
- * The trig constants are computed as they are needed
- * each time cfft is called.
- */
-
- {
- long osize; /* Size of old transforms being combined */
- long nsize; /* Size of new transforms being produced */
- Complex_double w;
- Complex_double wtemp;
- Complex_double wk; /* (w ** k) */
- Complex_float wk_times_f_k_odd;
- Complex_float wkfloat; /* Floating copy for innermost loop */
-
- double theta;
- long ieven, iodd, k;
-
- for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
- {
- theta = 2.0 * PI / (isign * nsize); /* PI is from math.h */
- w.real = cos (theta);
- w.imaginary = sin (theta);
- wk.real = 1.0;
- wk.imaginary = 0.0;
- wkfloat.real = 1.0;
- wkfloat.imaginary = 0.0;
-
- for (k = 0; k < osize; k++)
- {
- for (ieven = k; ieven < n; ieven += nsize)
- {
- iodd = ieven + osize;
- /*
- * Note that the the upper and lower parts of each new
- * transform being produced use the same components--
- * taken through a 'second cycle' since they are periodic
- * in their original N (nsize/2). Only W**k differs,
- * and, fortuitously, W**k = -W**(k+nsize/2). Also
- * fortuitously, the input slots for F_k_even and F_k_odd
- * are separated by nsize/2, as are the output slots for
- * F_k and F_(k+nsize/2), so we can use and write over the
- * same slots during each pass in the inner loop, using the
- * same value of W**k (we subtract instead of negating).
- */
- C_MULT (wkfloat, datac[iodd], wk_times_f_k_odd);
- C_SUB (datac[ieven], wk_times_f_k_odd, datac[iodd]);
- C_ADD (datac[ieven], wk_times_f_k_odd, datac[ieven]);
-
- } /* end for (sums using same wk factor) */
-
- /* C_MULT (wk, w, wk) but must use temp or i/o coincide */
-
- C_MULT (wk, w, wtemp);
- wkfloat.real = (float) (wk.real = wtemp.real);
- wkfloat.imaginary = (float) (wk.imaginary =
- wtemp.imaginary);
-
- } /* end for (k = 1, 2, ..., osize) */
-
- } /* end for (osize, nsize...) */
-
- } /* end of Danielson-Lanczos section (SaveMemory slow version) */
-
- #ifdef SAVE_TRIG_OPT
- else /* if (!SaveMemory) */
- {
-
- /* To speed things up under most cases (where the same N is used for
- * more than one transform, we are going to first create the vector of
- * trigonometrically derived wk constants. If N is unchanged, and the
- * the vector has already been created, we simply re-use the previous
- * vector. If it weren't for this section (which uses GFFT allocation
- * routines) there would be no memory allocated in cfft, and no need for
- * the gfft.h header file). Note that this also increases the dynamic
- * memory requirements of gfft significantly because the vector necessary
- * must be large enough for 2*n complex floating point numbers, making it
- * one of the biggest single dynamic memory uses--particularly for large N
- * which some people might be interested in. So, I have decided
- * to make this feature optional, if included at all. As currently
- * implemented, it also requires the 'safe' gmalloc, which longjmps
- * on error (so as not to confuse the interface here and elsewhere).
- *
- * 11-March-94 CPP; I'm somewhat disappointed with the 5-7% overall
- * performance improvement attributable to this modification, in spite of
- * the fact that lstat shows cfft as using 67% (58% accelerated) of the
- * total time for a 30 sec batch spectrum analysis (run on a vanilla 68000
- * with no FFP even). But, I should have anticipated this minor change;
- * the trigs become a relatively small part of the overall calculations.
- * Don't let anyone tell you any different. (Meanwhile, the dreaded
- * ok_spectrum and ok_read, which look so horribly inefficient, were only
- * taking around 2% of the total time each.) The number one line is the
- * complex subtraction following the main fft complex multiplication; it
- * probably waits there for the complex multiplication to finish. That,
- * not surprisingly, it the chief bottleneck. As it should be.
- *
- */
-
- if (n > 1 && (!wkvector || n != last_n))
- {
- long osize; /* Size of old transforms being combined */
- long nsize; /* Size of new transforms being produced */
- Complex_double w;
- Complex_double wk; /* (w ** k) */
- Complex_double wtemp;
-
- double theta;
- long k;
- long vsize; /* vector size */
- long ntemp;
- long wki = 0;
- /*
- * The vector size is N + log2(N) - 1 where log2 is 'log base 2'
- * we calculate this using integer arithmetic
- */
- vsize = -1;
- for (ntemp = n; ntemp > 1; ntemp /= 2) vsize++; /* log base 2 */
- vsize += n;
-
- if (wkvector)
- {
- gfree (wkvector);
- }
- wkvector = gmalloc (vsize * sizeof(Complex_float),
- NOTHING_SPECIAL);
-
- for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
- {
- theta = 2.0 * PI / (isign * nsize); /* PI is from math.h */
- w.real = cos (theta);
- w.imaginary = sin (theta);
-
- wk.real = 1.0;
- wk.imaginary = 0.0;
-
- wkvector[wki].real = 1.0;
- wkvector[wki++].imaginary = 0.0;
-
- for (k = 0; k < osize; k++)
- {
- /* C_MULT (wk, w, wk) but must use temp or i/o coincide */
-
- C_MULT (wk, w, wtemp);
- wkvector[wki].real = (float) (wk.real = wtemp.real);
- wkvector[wki++].imaginary = (float) (wk.imaginary =
- wtemp.imaginary);
-
- } /* end for (k = 1, 2, ..., osize) */
-
- } /* end for (osize, nsize...) */
-
- last_n = n;
-
- } /* end of trig constants computation block */
-
- /*
- * Now, we compute this fft
- */
- {
- long osize; /* Size of old transforms being combined */
- long nsize; /* Size of new transforms being produced */
- Complex_float wk_times_f_k_odd;
-
- long ieven, iodd, k;
- register Complex_float *wkp = wkvector;
-
- for (osize = 1,nsize = 2; osize < n; osize = nsize,nsize *= 2)
- {
- for (k = 0; k < osize; k++)
- {
- for (ieven = k; ieven < n; ieven += nsize)
- {
- iodd = ieven + osize;
- /*
- * Note that the the upper and lower parts of each new
- * transform being produced use the same components--
- * taken through a 'second cycle' since they are periodic
- * in their original N (nsize/2). Only W**k differs,
- * and, fortuitously, W**k = -W**(k+nsize/2). Also
- * fortuitously, the input slots for F_k_even and F_k_odd
- * are separated by nsize/2, as are the output slots for
- * F_k and F_(k+nsize/2), so we can use and write over the
- * same slots during each pass in the inner loop, using the
- * same value of W**k (we subtract instead of negating).
- */
- C_MULT (*wkp, datac[iodd], wk_times_f_k_odd);
- C_SUB (datac[ieven],wk_times_f_k_odd, datac[iodd]);
- C_ADD (datac[ieven],wk_times_f_k_odd, datac[ieven]);
-
- #ifdef PROGRESS_INDICATOR
- if (++Inner_Loop_Count >= next_progress_count)
- {
- next_progress_count += PROGRESS_INCREMENT;
- progress_requester__update
- ((int) (Initial_Progress + (Inner_Loop_Count *
- Percent_Per_Loop)));
- }
- #endif
- } /* end for (sums using same wk factor) */
-
- wkp++;
-
- } /* end for (k = 1, 2, ..., osize) */
-
- wkp++;
-
- } /* end for (osize, nsize...) */
-
- } /* end of transform computation */
-
- } /* end of !SaveMemory (fast memory hog) version */
-
- #endif /* whether SAVE_TRIG_OPT defined or not */
-
- } /* end of cfft */
-