//$$ fft.cxx                         Fast fourier transform

// Copyright (C) 1991: R B Davies and DSIR


#define WANT_MATH

#include "include.hxx"

#include "newmatap.hxx"


static void cossin(int n, int d, real& c, real& s)
// calculate cos(twopi*n/d) and sin(twopi*n/d)
// minimise roundoff error
{
   long n4 = n * 4; int sector = (int)floor( (real)n4 / (real)d + 0.5 );
   n4 -= sector * d;
   if (sector < 0) sector = 3 - (3 - sector) % 4; else sector %= 4;
   real ratio = 1.5707963267948966192 * (real)n4 / (real)d;

   switch (sector)
   {
   case 0: c =  cos(ratio); s =  sin(ratio); break;
   case 1: c = -sin(ratio); s =  cos(ratio); break;
   case 2: c = -cos(ratio); s = -sin(ratio); break;
   case 3: c =  sin(ratio); s = -cos(ratio); break;
   }
}

static void fftstep(ColumnVector& A, ColumnVector& B, ColumnVector& X,
   ColumnVector& Y, int after, int now, int before)
{
   // const real twopi = 6.2831853071795864769;
   const int gamma = after * before;  const int delta = now * after;
   // const real angle = twopi / delta;  real temp;
   // real r_omega = cos(angle);  real i_omega = -sin(angle);
   real r_arg = 1.0;  real i_arg = 0.0;
   real* x = X.Store();  real* y = Y.Store();   // pointers to array storage
   const int m = A.Nrows() - gamma;

   for (int j = 0; j < now; j++)
   {
      real* a = A.Store(); real* b = B.Store(); // pointers to array storage
      real* x1 = x; real* y1 = y; x += after; y += after;
      for (int ia = 0; ia < after; ia++)
      {
	 // generate sins & cosines explicitly rather than iteratively
	 // for more accuracy; but slower
	 cossin(-(j*after+ia), delta, r_arg, i_arg);

	 real* a1 = a++; real* b1 = b++; real* x2 = x1++; real* y2 = y1++;
	 if (now==2)
	 {
	    int ib = before; while (ib--)
	    {
	       real* a2 = m + a1; real* b2 = m + b1; a1 += after; b1 += after;
	       real r_value = *a2; real i_value = *b2;
	       *x2 = r_value * r_arg - i_value * i_arg + *(a2-gamma);
	       *y2 = r_value * i_arg + i_value * r_arg + *(b2-gamma);
	       x2 += delta; y2 += delta;
	    }
	 }
	 else
	 {
	    int ib = before; while (ib--)
	    {
	       real* a2 = m + a1; real* b2 = m + b1; a1 += after; b1 += after;
	       real r_value = *a2; real i_value = *b2;
	       int in = now-1; while (in--)
	       {
		  // it should be possible to make this faster
		  // hand code for now = 2,3,4,5,8
		  // use symmetry to halve number of operations
		  a2 -= gamma; b2 -= gamma;  real temp = r_value;
		  r_value = r_value * r_arg - i_value * i_arg + *a2;
		  i_value = temp    * i_arg + i_value * r_arg + *b2;
	       }
	       *x2 = r_value; *y2 = i_value;   x2 += delta; y2 += delta;
	    }
	 }

         // temp = r_arg;
         // r_arg = r_arg * r_omega - i_arg * i_omega;
         // i_arg = temp  * i_omega + i_arg * r_omega;

      }
   }
}


void FFT(const ColumnVector& U, const ColumnVector& V,
   ColumnVector& X, ColumnVector& Y)
{
   // from Carl de Boor (1980), Siam J Sci Stat Comput, 1 173-8
   const int n = U.Nrows();                     // length of arrays
   if (n != V.Nrows()) MatrixError("FFT - vector lengths unequal");
   if (n == 0) MatrixError("FFT - vector length zero");
#ifdef __ZTC__
   ColumnVector A = U.c(); ColumnVector B = V.c();
#else
   ColumnVector A = U; ColumnVector B = V;
#endif
   X.ReDimension(n); Y.ReDimension(n);
   const int nextmx = 8;
#ifndef ATandT
   int prime[8] = { 2,3,5,7,11,13,17,19 };
#else
   int prime[8];
   prime[0]=2; prime[1]=3; prime[2]=5; prime[3]=7;
   prime[4]=11; prime[5]=13; prime[6]=17; prime[7]=19;
#endif
   int after = 1; int before = n; int next = 0; BOOL inzee = TRUE;

   do
   {
      int now, b1;
      for (;;)
      {
	 if (next < nextmx) now = prime[next];
	 b1 = before / now;  if (b1 * now == before) break;
	 next++; now += 2;
      }
      before = b1;

      if (inzee) fftstep(A, B, X, Y, after, now, before);
      else fftstep(X, Y, A, B, after, now, before);

      inzee = !inzee; after *= now;
   }
   while (before != 1);

   if (inzee) { A.Release(); X = A; B.Release(); Y = B; }
}


