/* fractal.c */ /* ** This file is placed into the public domain by its author, ** Carey Bloodworth (Carey@Bloodworth.org) on July 16, 2001 ** ** This multiplication demo is not designed for high performance. ** It's a tutorial program designed to be used with the information ** on my web site at www.Bloodworth.org */ /* ** This file demonstrates a very basic Fractal (Karatsuba) mul ** ** To compile this using GCC: ** gcc main.c fractal.c -o fractal.exe */ #include#include #include #include typedef short int Short; static Short *FractWork; static int BASE; static int BASE_DIG; /* ** Basic math routines. */ /* ** Sum = Num1 + Num2 ** Just like a regular add, except the carry can ripple on up higher, ** beyond where we end our addition. */ Short RippleAdd(Short *Limit, Short *Sum, Short *Num1, Short *Num2, int Len) { Short temp, Carry; Carry = 0; Num1 += Len; Num2 += Len; Sum += Len; while (Len--) { temp = (Short)(*(--Num1) + *(--Num2) + Carry); *(--Sum) = (Short)(temp % BASE); Carry = (Short)(temp / BASE); } while ((Sum > Limit) && (Carry != 0)) { temp = (Short)(*(--Sum) + Carry); *Sum = (Short)(temp % BASE); Carry = (Short)(temp / BASE); } return Carry; } /* ** Sum = Num1 + Num2 */ Short Add(Short *Sum, Short *Num1, Short *Num2, int Len) { return RippleAdd(Sum, Sum, Num1, Num2, Len); } /* ** Dif = Num1-Num2 ** Just like a regular sub, except the borrow can ripple on up higher, ** beyond where we end our subtraction. */ Short RippleSub(Short *Limit, Short *Dif, Short *Num1, Short *Num2, int Len) { Short temp, Borrow; Borrow = 0; Num1 += Len; Num2 += Len; Dif += Len; while (Len--) { temp = (Short)(*(--Num1) - *(--Num2) - Borrow); Borrow = 0; if (temp < 0) { Borrow = 1; temp = (Short)(temp + BASE); } *(--Dif) = temp; } while ((Dif > Limit) && (Borrow != 0)) { temp = (Short)(*(--Dif) - Borrow); Borrow = 0; if (temp < 0) { Borrow = 1; temp = (Short)(temp +BASE); } *Dif = temp; } return Borrow; } /* ** Dif = Num1-Num2 */ Short Sub(Short *Dif, Short *Num1, Short *Num2, int Len) { return RippleSub(Dif, Dif, Num1, Num2, Len); } /* ** Num = 0 - Num */ void Negate(Short *Num, int Len) { int x; Short d, Borrow; Borrow = 0; for (x = Len - 1; x >= 0; x--) { d = (Short)(0 - Num[x] - Borrow); Borrow = 0; if (d < 0) { Borrow = 1; d = (Short)(d + BASE); } Num[x] = d; } } void SlowMul(Short *Prod, Short *Num1, Short *Num2, int Len) { long int p; int Ndx1, Ndx2, NdxP; for (NdxP = 0; NdxP < Len * 2; NdxP++) Prod[NdxP] = 0; for (Ndx2 = Len - 1; Ndx2 >= 0; Ndx2--) { if (Num2[Ndx2] == 0) continue; NdxP = Ndx2 + Len; p = 0; for (Ndx1 = Len - 1; Ndx1 >= 0; Ndx1--) { p = (long int) Num1[Ndx1] * (long int) Num2[Ndx2] + p + (long int) Prod[NdxP]; Prod[NdxP] = (Short) (p % BASE); p = p / BASE; NdxP--; } while ((p) && (NdxP >= 0)) { p += (long int) Prod[NdxP]; Prod[NdxP] = (Short) (p % BASE); p = p / BASE; NdxP--; } } } #if 0 /* ** This is based on the simple Fractal / Divide and Conquer ** O(n^1.585) method. ** ** It's fairly simple. a*b is: a1b1(B^2+B)+(a1-a2)(b2-b1)B+a2b2(B+1) ** ** The simple formula above really only works when the length of the ** numbers is a power of two. The implementation below can deal ** with numbers of any length, as long as both 'a' and 'b' are the ** same length. Doing different lengths would have complicated it ** more, and wasn't needed anyway. ** ** For lengths of powers of two, you need 4*SIZE storage. For other ** sizes, you need 5*SIZE. This is guaranteed enough by the nature ** of the formula, and the way I do the storage. ** ** Also, the program attempts to divide the numbers so that at ** least one of them will be a power of two. This lets the FFT ** handle at least this part efficiently. Overall, it's better to ** calculate digits of pi that are powers of two. */ void FractalMul(Short *prod, Short *a, Short *b, Indexer Len) { int SignA, SignB; Short *offset; Short *WorkArea; Short *a1, *a2, *b1, *b2; Indexer x, HLen1, HLen2, OddLen; /* ** HLen1 is the length of the first sub-part of the numbers ** HLen2 is the length of the second part. HLen1 is always ** the longest if the length isn't even. HLen stands for ** 'Half Length'. ** OddLen keeps track of whether it was an even length. ** ** The a1,a2,b1,b2 are used just so you can relate to the ** formula given above. */ /* ** Figure out how to divide the number. Preferably, so that ** at least one of the numbers will be a power of two. ** We also don't want the second number to be either 0 or 1. */ HLen1 = Len; /* ** Make it a power of two by stripping off all ** but the high bit. */ while (!IsPow2(HLen1)) HLen1 ^= (HLen1 & -HLen1); if (HLen1 == Len) HLen1 /= 2; else if (HLen1 == Len - 1) HLen1 = Len - HLen1 / 2; HLen2 = Len - HLen1; OddLen = HLen1 - HLen2; WorkArea = prod + 2 * Len; a1 = a; a2 = a1 + HLen1; b1 = b; b2 = b1 + HLen1; /* ** Now do the (b2-b1) part of the formula. */ for (x = 0; x < OddLen; x++) prod[x] = 0; Copy(prod + OddLen, b2, HLen2); SignB = Sub(prod, prod, b1, HLen1); if (SignB) Negate(prod, HLen1); /* ** Now do the (a1-a2) part of the formula. ** I don't like damaging the number, even though we'll ** fix it later. It does however reduce the working space ** you need. */ SignA = RippleSub(a1, a1 + OddLen, a1 + OddLen, a2, HLen2); if (SignA) Negate(a1, HLen1); /* ** Now multiply those two differences (a1-a2)*(b2-b1) ** and then, depending on the signs of the two subtractions ** either add or subtract it into our answer, which so ** far is still zero. By doing the middle part of the ** equation first, I can save some memory. */ Mul(WorkArea, prod, a1, HLen1); for (x = 0; x < 2 * Len; x++) prod[x] = 0; offset = prod + HLen1 - OddLen; if (SignA == SignB) RippleAdd(prod, offset, offset, WorkArea, HLen1 * 2); else RippleSub(prod, offset, offset, WorkArea, HLen1 * 2); /* ** Now we have to restore the 'a1' we damaged. */ if (SignA) Negate(a1, HLen1); RippleAdd(a1, a1 + OddLen, a1 + OddLen, a2, HLen2); /* ** Now we do the first part of the formula, a1*b1 ** We then add that product into our answer. ** We have to add it in two places, remember? */ Mul(WorkArea, a1, b1, HLen1); offset = prod + HLen2; RippleAdd(prod, offset, offset, WorkArea, HLen1 * 2); Add(prod, prod, WorkArea, HLen1 * 2); /* ** Now we do the last part of the formula, a2*b2 ** We then add that product into our answer. ** We have to add it in two places, remember? */ Mul(WorkArea, a2, b2, HLen2); offset = prod + HLen1 + OddLen; RippleAdd(prod, offset, offset, WorkArea, HLen2 * 2); offset = prod + HLen1 * 2; RippleAdd(prod, offset, offset, WorkArea, HLen2 * 2); } #endif /* ** This is based on the simple Fractal / Divide and Conquer / Karatsuba ** O(n^1.585) method. ** ** It's fairly simple. a*b is: a1b1(B^2+B)+(a1-a2)(b2-b1)B+a2b2(B+1) ** ** You need 4*SIZE storage for the product and the working space. ** ** This version can only handle sizes that are a power of two. ** ** It can be simplified if we are only squaring. */ void FractalMul(Short *Prod, Short *Num1, Short *Num2, int Len) {int HLen,x; int Sign1, Sign2; Short *offset; Short *Work=Prod+2*Len; void Mul(Short *Prod, Short *Num1, Short *Num2, int Len); HLen = Len/2; if (Num1==Num2) /* Squaring? It can be simplified. */ { Sign1 = Sub(Prod, Num1, Num1+HLen, HLen); if (Sign1) Negate(Prod, HLen); Mul(Work,Prod,Prod,HLen); for (x=0;x 4) { printf("Error: The fft is slightly hardwired for <= 4 digits in the base.\n"); exit(0); } FractWork=(Short*)malloc(Bytes); if (FractWork==NULL) { printf("Unable to allocate memory for FractWork.\n"); printf("Len=%d Bytes=%d\n",(int)Len,(int)Bytes); exit(0); } } void DeInitFFT(unsigned long int Len) { free(FractWork); }