/*
fractal.c
*/
/*
** This file is placed into the public domain by its author,
** Carey Bloodworth (Carey@Bloodworth.org) on July 16, 2001
**
** This multiplication demo is not designed for high performance.
** It's a tutorial program designed to be used with the information
** on my web site at www.Bloodworth.org
*/
/*
** This file demonstrates a very basic Fractal (Karatsuba) mul
**
** To compile this using GCC:
** gcc main.c fractal.c -o fractal.exe
*/
#include
#include
#include
#include
typedef short int Short;
static Short *FractWork;
static int BASE;
static int BASE_DIG;
/*
** Basic math routines.
*/
/*
** Sum = Num1 + Num2
** Just like a regular add, except the carry can ripple on up higher,
** beyond where we end our addition.
*/
Short
RippleAdd(Short *Limit, Short *Sum, Short *Num1, Short *Num2, int Len)
{
Short temp, Carry;
Carry = 0;
Num1 += Len;
Num2 += Len;
Sum += Len;
while (Len--)
{
temp = (Short)(*(--Num1) + *(--Num2) + Carry);
*(--Sum) = (Short)(temp % BASE);
Carry = (Short)(temp / BASE);
}
while ((Sum > Limit) && (Carry != 0))
{
temp = (Short)(*(--Sum) + Carry);
*Sum = (Short)(temp % BASE);
Carry = (Short)(temp / BASE);
}
return Carry;
}
/*
** Sum = Num1 + Num2
*/
Short
Add(Short *Sum, Short *Num1, Short *Num2, int Len)
{
return RippleAdd(Sum, Sum, Num1, Num2, Len);
}
/*
** Dif = Num1-Num2
** Just like a regular sub, except the borrow can ripple on up higher,
** beyond where we end our subtraction.
*/
Short
RippleSub(Short *Limit, Short *Dif, Short *Num1, Short *Num2, int Len)
{
Short temp, Borrow;
Borrow = 0;
Num1 += Len;
Num2 += Len;
Dif += Len;
while (Len--)
{
temp = (Short)(*(--Num1) - *(--Num2) - Borrow);
Borrow = 0;
if (temp < 0)
{
Borrow = 1;
temp = (Short)(temp + BASE);
}
*(--Dif) = temp;
}
while ((Dif > Limit) && (Borrow != 0))
{
temp = (Short)(*(--Dif) - Borrow);
Borrow = 0;
if (temp < 0)
{
Borrow = 1;
temp = (Short)(temp +BASE);
}
*Dif = temp;
}
return Borrow;
}
/*
** Dif = Num1-Num2
*/
Short
Sub(Short *Dif, Short *Num1, Short *Num2, int Len)
{
return RippleSub(Dif, Dif, Num1, Num2, Len);
}
/*
** Num = 0 - Num
*/
void
Negate(Short *Num, int Len)
{
int x;
Short d, Borrow;
Borrow = 0;
for (x = Len - 1; x >= 0; x--)
{
d = (Short)(0 - Num[x] - Borrow);
Borrow = 0;
if (d < 0)
{
Borrow = 1;
d = (Short)(d + BASE);
}
Num[x] = d;
}
}
void
SlowMul(Short *Prod, Short *Num1, Short *Num2, int Len)
{
long int p;
int Ndx1, Ndx2, NdxP;
for (NdxP = 0; NdxP < Len * 2; NdxP++)
Prod[NdxP] = 0;
for (Ndx2 = Len - 1; Ndx2 >= 0; Ndx2--)
{
if (Num2[Ndx2] == 0)
continue;
NdxP = Ndx2 + Len;
p = 0;
for (Ndx1 = Len - 1; Ndx1 >= 0; Ndx1--)
{
p = (long int) Num1[Ndx1] * (long int) Num2[Ndx2] +
p + (long int) Prod[NdxP];
Prod[NdxP] = (Short) (p % BASE);
p = p / BASE;
NdxP--;
}
while ((p) && (NdxP >= 0))
{
p += (long int) Prod[NdxP];
Prod[NdxP] = (Short) (p % BASE);
p = p / BASE;
NdxP--;
}
}
}
#if 0
/*
** This is based on the simple Fractal / Divide and Conquer
** O(n^1.585) method.
**
** It's fairly simple. a*b is: a1b1(B^2+B)+(a1-a2)(b2-b1)B+a2b2(B+1)
**
** The simple formula above really only works when the length of the
** numbers is a power of two. The implementation below can deal
** with numbers of any length, as long as both 'a' and 'b' are the
** same length. Doing different lengths would have complicated it
** more, and wasn't needed anyway.
**
** For lengths of powers of two, you need 4*SIZE storage. For other
** sizes, you need 5*SIZE. This is guaranteed enough by the nature
** of the formula, and the way I do the storage.
**
** Also, the program attempts to divide the numbers so that at
** least one of them will be a power of two. This lets the FFT
** handle at least this part efficiently. Overall, it's better to
** calculate digits of pi that are powers of two.
*/
void
FractalMul(Short *prod, Short *a, Short *b, Indexer Len)
{
int SignA, SignB;
Short *offset;
Short *WorkArea;
Short *a1, *a2, *b1, *b2;
Indexer x, HLen1, HLen2, OddLen;
/*
** HLen1 is the length of the first sub-part of the numbers
** HLen2 is the length of the second part. HLen1 is always
** the longest if the length isn't even. HLen stands for
** 'Half Length'.
** OddLen keeps track of whether it was an even length.
**
** The a1,a2,b1,b2 are used just so you can relate to the
** formula given above.
*/
/*
** Figure out how to divide the number. Preferably, so that
** at least one of the numbers will be a power of two.
** We also don't want the second number to be either 0 or 1.
*/
HLen1 = Len;
/*
** Make it a power of two by stripping off all
** but the high bit.
*/
while (!IsPow2(HLen1))
HLen1 ^= (HLen1 & -HLen1);
if (HLen1 == Len) HLen1 /= 2;
else if (HLen1 == Len - 1) HLen1 = Len - HLen1 / 2;
HLen2 = Len - HLen1;
OddLen = HLen1 - HLen2;
WorkArea = prod + 2 * Len;
a1 = a;
a2 = a1 + HLen1;
b1 = b;
b2 = b1 + HLen1;
/*
** Now do the (b2-b1) part of the formula.
*/
for (x = 0; x < OddLen; x++) prod[x] = 0;
Copy(prod + OddLen, b2, HLen2);
SignB = Sub(prod, prod, b1, HLen1);
if (SignB) Negate(prod, HLen1);
/*
** Now do the (a1-a2) part of the formula.
** I don't like damaging the number, even though we'll
** fix it later. It does however reduce the working space
** you need.
*/
SignA = RippleSub(a1, a1 + OddLen, a1 + OddLen, a2, HLen2);
if (SignA) Negate(a1, HLen1);
/*
** Now multiply those two differences (a1-a2)*(b2-b1)
** and then, depending on the signs of the two subtractions
** either add or subtract it into our answer, which so
** far is still zero. By doing the middle part of the
** equation first, I can save some memory.
*/
Mul(WorkArea, prod, a1, HLen1);
for (x = 0; x < 2 * Len; x++) prod[x] = 0;
offset = prod + HLen1 - OddLen;
if (SignA == SignB) RippleAdd(prod, offset, offset, WorkArea, HLen1 * 2);
else RippleSub(prod, offset, offset, WorkArea, HLen1 * 2);
/*
** Now we have to restore the 'a1' we damaged.
*/
if (SignA) Negate(a1, HLen1);
RippleAdd(a1, a1 + OddLen, a1 + OddLen, a2, HLen2);
/*
** Now we do the first part of the formula, a1*b1
** We then add that product into our answer.
** We have to add it in two places, remember?
*/
Mul(WorkArea, a1, b1, HLen1);
offset = prod + HLen2;
RippleAdd(prod, offset, offset, WorkArea, HLen1 * 2);
Add(prod, prod, WorkArea, HLen1 * 2);
/*
** Now we do the last part of the formula, a2*b2
** We then add that product into our answer.
** We have to add it in two places, remember?
*/
Mul(WorkArea, a2, b2, HLen2);
offset = prod + HLen1 + OddLen;
RippleAdd(prod, offset, offset, WorkArea, HLen2 * 2);
offset = prod + HLen1 * 2;
RippleAdd(prod, offset, offset, WorkArea, HLen2 * 2);
}
#endif
/*
** This is based on the simple Fractal / Divide and Conquer / Karatsuba
** O(n^1.585) method.
**
** It's fairly simple. a*b is: a1b1(B^2+B)+(a1-a2)(b2-b1)B+a2b2(B+1)
**
** You need 4*SIZE storage for the product and the working space.
**
** This version can only handle sizes that are a power of two.
**
** It can be simplified if we are only squaring.
*/
void
FractalMul(Short *Prod, Short *Num1, Short *Num2, int Len)
{int HLen,x;
int Sign1, Sign2;
Short *offset;
Short *Work=Prod+2*Len;
void Mul(Short *Prod, Short *Num1, Short *Num2, int Len);
HLen = Len/2;
if (Num1==Num2) /* Squaring? It can be simplified. */
{
Sign1 = Sub(Prod, Num1, Num1+HLen, HLen);
if (Sign1) Negate(Prod, HLen);
Mul(Work,Prod,Prod,HLen);
for (x=0;x 4)
{
printf("Error: The fft is slightly hardwired for <= 4 digits in the base.\n");
exit(0);
}
FractWork=(Short*)malloc(Bytes);
if (FractWork==NULL)
{
printf("Unable to allocate memory for FractWork.\n");
printf("Len=%d Bytes=%d\n",(int)Len,(int)Bytes);
exit(0);
}
}
void DeInitFFT(unsigned long int Len)
{
free(FractWork);
}