/*
     balanced.c
*/
/*
** This file is placed into the public domain by its author,
** Carey Bloodworth (Carey@Bloodworth.org) on July 16, 2001
**
** This multiplication demo is not designed for high performance.
** It's a tutorial program designed to be used with the information
** on my web site at www.Bloodworth.org
*/
/*
** This file demonstrates a typical Real<->Complex wrapper based
** FFT multiplication using 'balanced' data.
**
** To compile this using GCC:
** gcc main.c balanced.c -o balanced.exe
*/
#include 
#include 
#include 
#include 
#include 

typedef short int Short;

typedef struct {double r,i;} Cmplx;

#define CmplxAdd(_S,_A,_B) {_S.r=_A.r+_B.r;_S.i=_A.i+_B.i;}
#define CmplxConj(_V) {_V.i=-_V.i;}
#define CmplxConj2(_A,_B) {_A.r=_B.r;_A.i=-_B.i;}
#define CmplxDivV(_A,_v) {_A.r/=(_v);_A.i/=(_v);}
#define CmplxImag(_A) _A.i
#define CmplxMul(_P,_A,_B)         \
  {Cmplx _R;                       \
   _R.r=(_A.r*_B.r) - (_A.i*_B.i); \
   _R.i=(_A.i*_B.r) + (_A.r*_B.i); \
   _P=_R;                          \
  }
#define CmplxMulV(_A,_v) {_A.r*=(_v);_A.i*=(_v);}
#define CmplxReal(_A) _A.r
#define CmplxSet(_S,_A,_B) {_S.r=_A;_S.i=_B;}
#define CmplxSub(D,_A,_B) {D.r=_A.r-_B.r;D.i=_A.i-_B.i;}


#define CalcFFTLen(_NumLen) ((((_NumLen)*BASE_DIG)*2)/BASE_DIG)
/* NumLen*BaseDig*ZeroPadding/Dig_Per_FFT */
/* The Real Value FFT length, not 'complex' FFT length. */

extern double MaxFFTError;

static double *FFTNum1=NULL, *FFTNum2=NULL;
static double BASE;
static int BASE_DIG;

/* A Konstant.  Compiler might not have this. */
double K_PI_    =3.14159265358979323846L;

/*
** I like to do the trig stuff as macros.
** It lets me seperate that stuff from the regular FFT stuff.
** It also lets me play with different ways to compute the trig.
*/
#if 1
#define TRIG_VARS  Cmplx PRoot,Root;

#define INIT_TRIG(LENGTH,DIR)       \
 PRoot.r=1.0;PRoot.i=0.0;           \
 Root.r=sin(K_PI_/((LENGTH)*2));    \
 Root.r=-2.0*Root.r*Root.r;         \
 Root.i=sin(K_PI_/(LENGTH))*(DIR);

#define NEXT_TRIG_POW         \
 {Cmplx Temp;                 \
  Temp=PRoot;                 \
  CmplxMul(PRoot,PRoot,Root); \
  CmplxAdd(PRoot,PRoot,Temp); \
 }
#endif

#if 0
#define TRIG_VARS                   \
 size_t TLen,TNdx;int TDir;         \
 Cmplx PRoot,Root;

#define INIT_TRIG(LENGTH,DIR)       \
 TNdx=0;TLen=LENGTH;TDir=(DIR);     \
 PRoot.r=1.0;PRoot.i=0.0;           \
 Root.r=sin(K_PI_/((LENGTH)*2.0));  \
 Root.r=-2.0*Root.r*Root.r;         \
 Root.i=sin(K_PI_/(LENGTH))*(DIR);

#define NEXT_TRIG_POW               \
 if (((++TNdx)&7)==0)               \
   {double Angle;                   \
    Angle=(K_PI_*(TNdx))/TLen;      \
    PRoot.r=sin(Angle*0.5);         \
    PRoot.r=1.0-2.0*PRoot.r*PRoot.r;\
    PRoot.i=sin(Angle)*(TDir);      \
   }                                \
 else                               \
   {Cmplx Temp;                     \
    Temp=PRoot;                     \
    CmplxMul(PRoot,PRoot,Root);     \
    CmplxAdd(PRoot,PRoot,Temp);     \
   }
#endif

#if 0
#define TRIG_VARS                   \
 double Angle;                      \
 size_t TLen,TNdx;int TDir;         \
 Cmplx PRoot;

#define INIT_TRIG(LENGTH,DIR)       \

#define NEXT_TRIG_POW               \
 Angle=(K_PI_*(++TNdx))/TLen;       \
 PRoot.r=sin(Angle*0.5);            \
 PRoot.r=1.0-2.0*PRoot.r*PRoot.r;   \
 PRoot.i=sin(Angle)*(TDir);
#endif


int Log2(int Num)
{int x=-1;
if (Num==0) return 0;
while (Num) {x++;Num/=2;}
return x;
}

static int
IsPow2(int Num)
{
  return ((Num & -Num) == Num);
}


/*
** Reorder complex data array by bit reversal rule.
*/
static void
FFTReOrder(Cmplx *Data, int Len)
{int Index,xednI,k;

xednI = 0;
for (Index = 0;Index < Len;Index++)
  {
   if (xednI > Index)
     {Cmplx Temp;
      Temp=Data[xednI];
      Data[xednI]=Data[Index];
      Data[Index]=Temp;
     }
   k=Len/2;
   while ((k <= xednI) && (k >=1)) {xednI-=k;k/=2;}
   xednI+=k;
  }
}

static void
FFT_T(Cmplx *Data, int Len,int Dir)
/* Generic Decimation in Time */
{
  int Step, HalfStep;
  int b;
  TRIG_VARS;

  FFTReOrder(Data, Len);

  Step = 1;
  while (Step < Len)
    {
      Step *= 2;
      HalfStep = Step/2;

      INIT_TRIG(HalfStep,Dir);

      for (b = 0; b < HalfStep; b++)
        {int L,R;
         for (L=b;L < Len; L+=Step)
           {Cmplx TRight,TLeft;
            R=L+HalfStep;
            TLeft=Data[L];TRight=Data[R];
            CmplxMul(TRight,TRight,PRoot);
            CmplxAdd(Data[L],TLeft,TRight);
            CmplxSub(Data[R],TLeft,TRight);
           }
         NEXT_TRIG_POW;
        }
    }
}

/*
** This is the routine that lets us use a 'Complex' FFT with our
** Real only data.
*/
static void
RealFFT(double *ddata, int Len, int Dir)
{
  int i, j;
  Cmplx *Data=(Cmplx*)ddata;
  TRIG_VARS;

  Len /= 2; /* input as 'double'.  Treat as Cmplx */

  if (Dir > 0) FFT_T(Data,Len,1);

  INIT_TRIG(Len,Dir);
  NEXT_TRIG_POW;

  for (i = 1, j = Len - i; i < Len/2; i++, j--)
    {Cmplx p1,p2,t;
   /* Seperate the two points from the jumbled points */
   /* There are *many* different ways to code this. */
   /* I think this is one of the clearer methods. */

     CmplxConj2(t,Data[j]);
     CmplxAdd(p1,Data[i],t);
     CmplxSub(p2,Data[i],t);CmplxMul(p2,p2,PRoot);
     /* Tricky.  Swap and change sign. */
     CmplxSet(t,-Dir*p2.i,Dir*p2.r);

     CmplxAdd(Data[i],p1,t);
     CmplxSub(Data[j],p1,t);CmplxConj(Data[j]);

     /* Normalize */
     CmplxDivV(Data[i],2.0);CmplxDivV(Data[j],2.0);

     NEXT_TRIG_POW;
    }

/* Index 0 has to be done special. */
  {double r,i;
   r=Data[0].r;i=Data[0].i;
   CmplxSet(Data[0],r+i,r-i);
  }

  if (Dir < 0)
    {
      CmplxDivV(Data[0],2.0);
      FFT_T(Data,Len,-1);
    }
}

void BalanceData(double *FFTNum,int Len)
/*
** Make our data 'balanced'.  Ranging from -Base/2 ... +Base/2
*/
{double Carry=0;int x;

 for (x = 0; x < Len; x++)
   {
    FFTNum[x]+=Carry;
    Carry=0;
    if (FFTNum[x] >= (BASE/2))
      {
       Carry=1;
       FFTNum[x]-=BASE;
      }
   }
 if (Carry > 0)
   FFTNum[0]-=Carry;
}

void UnBalanceData(double *FFTNum,int Len)
/*
** Unbalance the data, and release the carries.
*/
{double Borrow, Dig;int x;
 double RawPyramid,Pyramid,PyramidError;

Borrow=0;
for (x=0;x MaxFFTError) MaxFFTError=PyramidError;

   /* add in our Carry / Borrow (depending on sign) */
   Dig=Pyramid+Borrow;
   Borrow=0;

   /* Release our carries */
   if (Dig >= BASE)
     {
      Borrow=floor(Dig/BASE);
      Dig=Dig-Borrow*BASE;
     }
   if (Dig < 0)
     {
      Borrow=floor(Dig/BASE);
      Dig=Dig-Borrow*BASE;
      if (Dig < 0) {Dig+=BASE;Borrow--;}
     }

   FFTNum[x]=Dig;
  }

/* Propogate any Carry/Borrow that might have occured */
x=0;
while (Borrow)
  {
   FFTNum[x]+=Borrow;
   Borrow=0;
   if (FFTNum[x] >= BASE) {FFTNum[x]-=BASE;Borrow=1;}
   x++;
   if (x >= Len) break;
  }
}

void
FFTMul(Short * Prod, Short * Num1, Short * Num2,int Len)
/*
** Do a plain FFT based multiply.
** Data is big-endian
**
** There is a bit of 'type' mismatch between our data, which
** is real, the output of the FFT, which is complex.  It's just
** 'type' stuff and not important, but it does mean we have to
** access the data both ways.
*/
{
  int x, FFTLen = CalcFFTLen(Len);
  double Scale;
  Cmplx *CF1=(Cmplx*)FFTNum1;
  Cmplx *CF2=(Cmplx*)FFTNum2;

/*
** This is a radix-2 FFT, so the length has to be a power of two.
*/
  if (!IsPow2(Len)) {printf("FFT length is not a power of two.\n");exit(0);}

/* Put our big endian data into FFT in little endian format & zero pad */
  for (x = 0; x < FFTLen; x++) FFTNum1[x] = 0.0;
  for (x = 0; x < Len; x++)    FFTNum1[x] = Num1[Len - x - 1];

  BalanceData(FFTNum1,FFTLen);

  RealFFT(FFTNum1, FFTLen, 1);

/*
** If we are squaring a number, we can save the cost
** of a FFT.
*/
  if (Num1 == Num2)
    {
      /*
      ** Now do the convolution
      */
      FFTNum1[0] = FFTNum1[0] * FFTNum1[0];
      FFTNum1[1] = FFTNum1[1] * FFTNum1[1];
      for (x = 1; x < FFTLen/2; x++) /* /2 treating as cmplx */
         CmplxMul(CF1[x],CF1[x],CF1[x]);
    }
  else
    {
      for (x = 0; x < FFTLen; x++) FFTNum2[x] = 0.0;
      for (x = 0; x < Len; x++)    FFTNum2[x] = Num2[Len - x - 1];
      BalanceData(FFTNum2,FFTLen);
      RealFFT(FFTNum2, FFTLen, 1);

      /*
      ** Now do the convolution
      */
      FFTNum1[0] = FFTNum1[0] * FFTNum2[0];
      FFTNum1[1] = FFTNum1[1] * FFTNum2[1];
      for (x = 1; x < FFTLen/2; x++) /* /2 treating as cmplx */
         CmplxMul(CF1[x],CF1[x],CF2[x]);
    }

/*
** Now do an Inverse FFT
*/
  RealFFT(FFTNum1, FFTLen, -1);

/*
** Scale our answer.
*/
  Scale=1.0/(FFTLen/2);
  for (x=0;x 0; x--)
     Prod[x-1] = FFTNum1[FFTLen-x];
}

void
InitFFT(unsigned long int Len,int Base,int BaseDig)
{int Bytes;

BASE=Base;
BASE_DIG=BaseDig;

Bytes=sizeof(double)*CalcFFTLen(Len);

if (BaseDig > 4)
  {
   printf("Error:  The fft is slightly hardwired for <= 4 digits in the base.\n");
   exit(0);
  }

FFTNum1=(double*)malloc(Bytes);
FFTNum2=(double*)malloc(Bytes);

if ((FFTNum1==NULL) || (FFTNum2==NULL))
   {
    printf("Unable to allocate memory for FFTNum.\n");
    printf("Len=%d Bytes=%d\n",(int)Len,(int)Bytes);
    exit(0);
   }
}

void DeInitFFT(unsigned long int Len)
{
free(FFTNum1);free(FFTNum2);
}