1 /* FHT/FFT performance comparison 
  2    John Bryan 2017
  3    -lrt -lm -lfftw3 -O3 
  4    gcc 4.6.3
  5 */
  6 
  7 #include <stdlib.h>
  8 #include <stdio.h>
  9 #include <time.h>
 10 #include <fftw3.h>
 11 #include <complex.h>
 12 #include <math.h>
 13 #include <stdint.h>
 14 #include <assert.h>
 15 #include <float.h>
 16 
 17 
 18 
 19 #define BILLION 1000000000L
 20 // rows is the number of lengths
 21 #define rows 10   
 22 // columns is the number of iterations averaged
 23 #define columns 1000 
 24 
 25 
 26 double randf(void)
 27 {
 28     //return random value between -1 and 1
 29     double low=-1.0, high=1.0;
 30     return (rand()/(double)(RAND_MAX))*abs(low-high)+low;
 31 }
 32 
 33 
 34 double dAvg(int array[], size_t length){
 35     // compute average
 36     size_t c;
 37     double sum = 0;
 38     for (c = 0; c < length; c++){ sum += array[c]; }
 39     return sum / (double) length;
 40 }
 41 
 42 
 43 int n_close(long double A, long double B, long double maxDiff, long double maxRelDiff)
 44 {
 45        float diff = fabs(A - B);
 46        if (diff <= maxDiff)
 47            {return 0;}
 48        A = fabs(A);
 49        B = fabs(B);
 50        long double largest = (long double)(B > A) ? B : A;
 51        if (diff <= largest * maxRelDiff)
 52            {return 0;}
 53        assert(0);
 54 }
 55 
 56 
 57 void swap (complex long double *x, int i, int j)
 58 {
 59     // used in bitreversal 
 60     complex long double temp;
 61     temp=x[i];
 62     x[i]=x[j];
 63     x[j]=temp;
 64 }
 65 
 66 
 67 void swap2 (long double *x, int i, int j)
 68 {
 69     // used in bitreversal2
 70     long double temp;
 71     temp=x[i];
 72     x[i]=x[j];
 73     x[j]=temp;
 74 }
 75 
 76 
 77 complex long double * bitreversal(complex long double *x, int N)
 78 {
 79      // used just before dit fft
 80      int M=1,T,k,n,h,r[N];
 81      for (h=0;h<N;h++)
 82          r[h]=0;
 83      while (M<N)
 84      {
 85          k=0;
 86          while (k<M)
 87          {
 88              T= (int)(2*r[k]);
 89              r[k]=T;
 90              r[k+M]=T+1;
 91              k=k+1;
 92           }
 93           M = (int)(2*M);
 94       }
 95       n=1;
 96       while (n<(N-1)) {
 97           if (r[n]>n) swap(x,n,r[n]);
 98           n=n+1; }
 99       return x;
100 }
101 
102 
103 long double * bitreversal2(long double *x, int N)
104 {
105      // used just before fht 
106      int M=1,T,k,n,h,r[N];
107      for (h=0;h<N;h++)
108          r[h]=0;
109      while (M<N)
110      {
111          k=0;
112          while (k<M)
113          {
114              T= (int)(2*r[k]);
115              r[k]=T;
116              r[k+M]=T+1;
117              k=k+1;
118           }
119           M = (int)(2*M);
120       }
121       n=1;
122       while (n<(N-1)) {
123           if (r[n]>n) swap2(x,n,r[n]);
124           n=n+1; }
125       return x;
126 }
127 
128 complex long double * fft(int N, complex long double *x2, complex long double *t,int p)
129 {
130     // DIT fft
131     int Np=2,Npp=1,BaseT=0,BaseB=0,n=0,tf=0,Bp=N/2,tss=N/2,P=0,b=0;
132     complex long double bot,top;
133     for (P=0; P<p; P++)
134     {
135        Npp=Np/2;
136        BaseT=0;
137        for (b=0; b<Bp; b++)
138        {
139            BaseB=BaseT+Npp;
140            for (n=0;n<Npp;n++)
141            {
142                if (n==0)
143                   bot=x2[BaseB+n];
144                else
145                {
146                   tf=n*tss;
147                   bot=x2[BaseB+n]*t[tf];
148                }
149                top=x2[BaseT+n];
150                x2[BaseT+n]=top+bot;
151                x2[BaseB+n]=top-bot;
152             }
153             BaseT=BaseT+Np;
154        }
155        Bp=Bp/2;
156        Np=Np*2;
157        tss=tss/2;
158     }
159     return x2;
160 }
161 
162 
163 long double *  fht(long double *x, long double *xo, long double *c, long double *s,int N,int p)
164 {
165     // fast hartley transform 
166     int Np=2,Npp=1,baseT=0,baseB=0,baseBB=0,Nmn=0,n=0,tf=0,Bp=N/2,tss=N/2,P=0,b=0;
167     long double xcs;
168     for (P=0; P<p; P++)
169     {
170        Npp=Np/2;
171        baseT=0;
172        for (b=0; b<Bp; b++)
173        {
174            baseB=baseT+Npp;
175            baseBB=baseT+Np;
176            for (n=0;n<Npp;n++)
177            {
178                tf=n*tss;
179                Nmn=(baseBB-n)%baseBB;
180                if (P%2==0)
181                {
182                    if (n==0)
183                        xcs=x[baseB+n];
184                    else
185                        xcs=(x[baseB+n]*c[tf])+(x[Nmn]*s[tf]);
186                    xo[baseT+n]=x[baseT+n]+xcs;
187                    xo[baseB+n]=x[baseT+n]-xcs;
188                }
189                else
190                {
191                    if (n==0)
192                        xcs=xo[baseB+n];
193                    else
194                         xcs=(xo[baseB+n]*c[tf])+(xo[Nmn]*s[tf]);
195                    x[baseT+n]=xo[baseT+n]+xcs;
196                    x[baseB+n]=xo[baseT+n]-xcs;
197                 }
198            }
199            baseT=baseT+Np;
200        }
201        Bp=Bp/2;
202        Np=Np*2;
203        tss=tss/2;
204     }
205     if (p%2==0)
206         return x;
207     else
208         return xo;
209 }
210 
211 
212 int main()
213 {
214    int u,i,N,cc=0,z=0;
215    int uN=rows;
216    int it=columns;
217    double mean3[uN],mean4[uN];
218    float PI=acos(-1);
219    complex long double *x2,*t;
220    long double *x,*xo,*x3,*c,*s,*check;
221    int ts, times3[uN][it], times4[uN][it];
222    struct timespec start,end;
223    FILE *f=fopen("file.txt","w");
224    fftw_complex  *in,*out;
225    fftw_plan p;
226    for (u=0;u<uN;u++)
227    {
228       for (cc=0;cc<it;cc++)
229       {
230           N=(int)powf(2.0,(float)(u+4));
231           c=malloc(sizeof(long double)*N/2);
232           s=malloc(sizeof(long double)*N/2);
233           t=malloc(sizeof(complex long double)*N/2);
234           for ( i = 0; i < N/2; i++ ) t[i]=cexp(-2*PI*I*i/N);
235           for ( i = 0; i < N/2; i++ ) c[i]=cos(2*PI*i/N);
236           for ( i = 0; i < N/2; i++ ) s[i]=sin(2*PI*i/N);
237           x=malloc(sizeof(long double)*N);
238           xo=malloc(sizeof(long double)*N);
239           x3=malloc(sizeof(long double)*N);
240           check=malloc(sizeof(long double)*N);
241           x2=malloc(sizeof(complex long double)*N);
242           for ( i = 0; i < N; i++ ) x[i]=randf();
243           for ( i = 0; i < N; i++ )x2[i]=x[i] +(0.*I);
244           in=fftw_malloc(sizeof(fftw_complex)*N);
245           out=fftw_malloc(sizeof(fftw_complex)*N );
246           for ( i = 0; i < N; i++ )
247           {
248               in[i][0]=creal(x[i]);
249               in[i][1]=cimag(x[i]);
250           }
251           x2=bitreversal(x2,N);
252           clock_gettime(CLOCK_MONOTONIC,&start);
253           x2=fft(N,x2,t,u+4);
254           clock_gettime(CLOCK_MONOTONIC,&end);
255           ts = BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
256           times4[u][cc]=ts;
257           p = fftw_plan_dft_1d ( N, in, out, FFTW_FORWARD, FFTW_ESTIMATE );
258           fftw_execute(p);
259           fftw_destroy_plan(p);
260           fftw_free(in);
261           x=bitreversal2(x,N);
262           clock_gettime(CLOCK_MONOTONIC,&start);
263           x3=fht(x,xo,c,s,N,u+4);
264           clock_gettime(CLOCK_MONOTONIC,&end);
265           ts = BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
266           times3[u][cc]=ts;
267            for (i=0;i<N;i++)
268            {
269               x3[i]=x3[i]/N;
270               x2[i]=x2[i]/N;
271            }
272            for (i=0;i<N;i++)
273            {
274               out[i][0]=out[i][0]/N;
275               out[i][1]=out[i][1]/N;
276               check[i]=out[i][0]-out[i][1];
277            }
278            for (i=0;i<N;i++)
279            {
280                n_close(x3[i],check[i],0.01,DBL_EPSILON);
281                n_close(creal(x2[i]),out[i][0],0.01,DBL_EPSILON);
282                n_close(cimag(x2[i]),out[i][1],0.01,DBL_EPSILON);
283            }
284       }
285    }
286    printf("\n\nfht and fft are correct\n\n");
287    fftw_free(out);
288    free(x);
289    free(x2);
290    free(xo);
291    free(t);
292    free(c);
293    free(s);
294    for (z=0; z<rows; z++)
295    {
296         mean3[z]=dAvg(times3[z], columns);
297         mean4[z]=dAvg(times4[z], columns);
298    }
299    //loop fft time results
300    for ( i = 0; i < uN; i++ )
301       fprintf (f, "  %3d   %llu \n",(int)powf(2.0,(float)(i+4)) , (long long unsigned int)mean4[i] );
302    //fht time results
303    for ( i = 0; i < uN; i++ )
304       fprintf (f, "  %3d   %llu \n",(int)powf(2.0,(float)(i+4)) , (long long unsigned int)mean3[i] );
305    fclose(f);
306    system("python plot_fht.py");
307    exit(0);
308 }