/*
     complex.c
*/
/*
** This file is placed into the public domain by its author,
** Carey Bloodworth (Carey@Bloodworth.org) on July 16, 2001
**
** This multiplication demo is not designed for high performance.
** It's a tutorial program designed to be used with the information
** on my web site at www.Bloodworth.org
*/
/*
** This file demonstrates a very basic FFT mul using a plain
** ordinary complex FFT.  Inefficient.
**
** To compile this using GCC:
** gcc main.c complex.c -o complex.exe
*/
#include 
#include 
#include 
#include 
#include 

typedef short int Short;

typedef struct {double r,i;} Cmplx;

#define CmplxAdd(_S,_A,_B) {_S.r=_A.r+_B.r;_S.i=_A.i+_B.i;}
#define CmplxConj(_V) {_V.i=-_V.i;}
#define CmplxConj2(_A,_B) {_A.r=_B.r;_A.i=-_B.i;}
#define CmplxDivV(_A,_v) {_A.r/=(_v);_A.i/=(_v);}
#define CmplxImag(_A) _A.i
#define CmplxMul(_P,_A,_B)         \
  {Cmplx _R;                       \
   _R.r=(_A.r*_B.r) - (_A.i*_B.i); \
   _R.i=(_A.i*_B.r) + (_A.r*_B.i); \
   _P=_R;                          \
  }
#define CmplxMulV(_A,_v) {_A.r*=(_v);_A.i*=(_v);}
#define CmplxReal(_A) _A.r
#define CmplxSet(_S,_A,_B) {_S.r=_A;_S.i=_B;}
#define CmplxSub(D,_A,_B) {D.r=_A.r-_B.r;D.i=_A.i-_B.i;}


#define CalcFFTLen(_NumLen) ((((_NumLen)*BASE_DIG)*2)/BASE_DIG)
/* NumLen*BaseDig*ZeroPadding/Dig_Per_FFT */

extern double MaxFFTError;
static Cmplx *FFTNum1=NULL, *FFTNum2=NULL;
static double BASE;
static int BASE_DIG;

/* A Konstant.  Compiler might not have this. */
double K_PI_    =3.14159265358979323846L;

/*
** I like to do the trig stuff as macros.
** It lets me seperate that stuff from the regular FFT stuff.
** It also lets me play with different ways to compute the trig.
*/
#if 1
#define TRIG_VARS  Cmplx PRoot,Root;

#define INIT_TRIG(LENGTH,DIR)       \
 PRoot.r=1.0;PRoot.i=0.0;           \
 Root.r=sin(K_PI_/((LENGTH)*2));    \
 Root.r=-2.0*Root.r*Root.r;         \
 Root.i=sin(K_PI_/(LENGTH))*(DIR);

#define NEXT_TRIG_POW         \
 {Cmplx Temp;                 \
  Temp=PRoot;                 \
  CmplxMul(PRoot,PRoot,Root); \
  CmplxAdd(PRoot,PRoot,Temp); \
 }
#endif

#if 0
#define TRIG_VARS                   \
 size_t TLen,TNdx;int TDir;         \
 Cmplx PRoot,Root;

#define INIT_TRIG(LENGTH,DIR)       \
 TNdx=0;TLen=LENGTH;TDir=(DIR);     \
 PRoot.r=1.0;PRoot.i=0.0;           \
 Root.r=sin(K_PI_/((LENGTH)*2.0));  \
 Root.r=-2.0*Root.r*Root.r;         \
 Root.i=sin(K_PI_/(LENGTH))*(DIR);

#define NEXT_TRIG_POW               \
 if (((++TNdx)&7)==0)               \
   {double Angle;                   \
    Angle=(K_PI_*(TNdx))/TLen;      \
    PRoot.r=sin(Angle*0.5);         \
    PRoot.r=1.0-2.0*PRoot.r*PRoot.r;\
    PRoot.i=sin(Angle)*(TDir);      \
   }                                \
 else                               \
   {Cmplx Temp;                     \
    Temp=PRoot;                     \
    CmplxMul(PRoot,PRoot,Root);     \
    CmplxAdd(PRoot,PRoot,Temp);     \
   }
#endif

#if 0
#define TRIG_VARS                   \
 double Angle;                      \
 size_t TLen,TNdx;int TDir;         \
 Cmplx PRoot;

#define INIT_TRIG(LENGTH,DIR)       \

#define NEXT_TRIG_POW               \
 Angle=(K_PI_*(++TNdx))/TLen;       \
 PRoot.r=sin(Angle*0.5);            \
 PRoot.r=1.0-2.0*PRoot.r*PRoot.r;   \
 PRoot.i=sin(Angle)*(TDir);
#endif


int Log2(int Num)
{int x=-1;
if (Num==0) return 0;
while (Num) {x++;Num/=2;}
return x;
}

static int
IsPow2(int Num)
{
  return ((Num & -Num) == Num);
}


#if 0
/*
** Reorder complex data array by bit reversal rule.
*/
static void
FFTReOrder(Cmplx *Data, int Len)
{int Index,xednI,k;

xednI = 0;
for (Index = 0;Index < Len;Index++)
  {
   if (xednI > Index)
     {Cmplx Temp;
      Temp=Data[xednI];
      Data[xednI]=Data[Index];
      Data[Index]=Temp;
     }
   k=Len/2;
   while ((k <= xednI) && (k >=1)) {xednI-=k;k/=2;}
   xednI+=k;
  }
}
#endif

/*
** Example of using both DiF and DiT style transforms
** without any scrambling.
*/

static void
FFT_T(Cmplx *Data, int Len,int Dir)
/* Generic Decimation in Time */
{
  int Step, HalfStep;
  int b;
  TRIG_VARS;

/*  FFTReOrder(Data, Len);*/

  Step = 1;
  while (Step < Len)
    {
      Step *= 2;
      HalfStep = Step/2;

      INIT_TRIG(HalfStep,Dir);

      for (b = 0; b < HalfStep; b++)
        {int L,R;
         for (L=b;L < Len; L+=Step)
           {Cmplx TRight,TLeft;
            R=L+HalfStep;
            TLeft=Data[L];TRight=Data[R];
            CmplxMul(TRight,TRight,PRoot);
            CmplxAdd(Data[L],TLeft,TRight);
            CmplxSub(Data[R],TLeft,TRight);
           }
         NEXT_TRIG_POW;
        }
    }
}

static void
FFT_F(Cmplx *Data, int Len,int Dir)
/* Generic Decimation in Frequency */
{
  int Step, HalfStep;
  int b;
  TRIG_VARS;

  Step = Len;
  while (Step > 1)
    {
      HalfStep = Step/2;

      INIT_TRIG(HalfStep,Dir);

      for (b = 0; b < HalfStep; b++)
        {int L,R;
         for (L=b;L < Len; L+=Step)
           {Cmplx TRight,TLeft;
            R=L+HalfStep;
            TLeft=Data[L];TRight=Data[R];
            CmplxAdd(Data[L],TLeft,TRight);
            CmplxSub(Data[R],TLeft,TRight);
            CmplxMul(Data[R],Data[R],PRoot);
           }
         NEXT_TRIG_POW;
        }
     Step /= 2;
    }
/*  FFTReOrder(Data, Len);*/
}

void
FFTMul(Short * Prod, Short * Num1, Short * Num2,int Len)
/*
** Do a plain FFT based multiply.
** Data is big-endian
*/
{
  int x, FFTLen = CalcFFTLen(Len);
  double Carry, RawPyramid, Pyramid, PyramidError;
  double inv;

/*
** This is a radix-2 FFT, so the length has to be a power of two.
*/
  if (!IsPow2(Len)) {printf("FFT length is not a power of two.\n");exit(0);}

  for (x = 0; x < FFTLen; x++) CmplxSet(FFTNum1[x],0,0);
  for (x = 0; x < Len; x++)    CmplxSet(FFTNum1[x],Num1[Len - x - 1],0);
/* Put our data into FFT in little endian format & zero pad */

  FFT_F(FFTNum1, FFTLen, 1);

/*
** If we are squaring a number, we can save the cost
** of a FFT.
*/
  if (Num1 == Num2)
    {
      /*
      ** Now do the convolution
      */
      for (x = 0; x < FFTLen; x++)
         CmplxMul(FFTNum1[x],FFTNum1[x],FFTNum1[x]);
    }
  else
    {
      for (x = 0; x < FFTLen; x++) CmplxSet(FFTNum2[x],0,0);
      for (x = 0; x < Len; x++)    CmplxSet(FFTNum2[x],Num2[Len - x - 1],0);
      FFT_F(FFTNum2, FFTLen, 1);

      /*
      ** Now do the convolution
      */
      for (x = 0; x < FFTLen; x++)
         CmplxMul(FFTNum1[x],FFTNum1[x],FFTNum2[x]);
    }

/*
** Now do an Inverse FFT
*/
  FFT_T(FFTNum1, FFTLen, -1);

/*
** Now round the results, and release our carries, and store
** the results in the 'prod' array.  Also do the normalization
** we didn't do in the FFT.  And, as usual, it's slightly faster
** to multiply by the reciprocal, instead of doing the division.
*/
  Carry = 0.0;
  MaxFFTError=0.0;
  inv = 1.0 / FFTLen;
  for (x = FFTLen; x > 0; x--)
    {
      RawPyramid = FFTNum1[FFTLen - x].r * inv + Carry;
      Pyramid = floor(RawPyramid + 0.5);
/* floor() is a very slow function on the x86/x87 */
      PyramidError = fabs(RawPyramid - Pyramid);
      if (PyramidError > MaxFFTError)
        {
          MaxFFTError = PyramidError;
/*        printf("New Max FFT Error=%f\n",MaxFFTError);*/
        }
      Carry = floor(Pyramid / BASE);
      Prod[x - 1] = Pyramid - Carry * BASE;
    }
}

void
InitFFT(unsigned long int Len,int Base,int BaseDig)
{int Bytes;

BASE=Base;
BASE_DIG=BaseDig;

Bytes=sizeof(Cmplx)*CalcFFTLen(Len);

if (BaseDig > 4)
  {
   printf("Error:  The fft is slightly hardwired for <= 4 digits in the base.\n");
   exit(0);
  }

FFTNum1=(Cmplx*)malloc(Bytes);
FFTNum2=(Cmplx*)malloc(Bytes);

if ((FFTNum1==NULL) || (FFTNum2==NULL))
   {
    printf("Unable to allocate memory for FFTNum.\n");
    printf("Len=%d Bytes=%d\n",(int)Len,(int)Bytes);
    exit(0);
   }
}

void DeInitFFT(unsigned long int Len)
{
free(FFTNum1);free(FFTNum2);
}