License Public Domain
Lines 585
Keywords
math (4) pi (1)
Permissions
Viewable by Everyone
Editable by All Siafoo Users
Hide
Solve a problem – Filter by language, license, keyword, owner, or search text to find code & info fast. Join Siafoo Now or Learn More

Calculate PI to 17 Million Digits Atom Feed 1

In Brief 17 million digits in O(n^1.7), using the Salamin/Brent/Gauss arithmetic/geometric mean pi formula.
# 's
  1/*
2** This method implements the Salamin / Brent / Gauss Arithmetic-
3** Geometric Mean pi formula.
4**
5** Let A[0] = 1, B[0] = 1/Sqrt(2)
6**
7** Then iterate from 1 to 'n'.
8** A[n] = (A[n-1] + B[n-1])/2
9** B[n] = Sqrt(A[n-1]*B[n-1])
10** C[n]^2 = A[n]^2 - B[n]^2 (or) C[n] = (A[n-1]-B[n-1])/2
11** n
12** PI[n] = 4A[n+1]^2 / (1-(Sum (2^(j+1))*C[j]^2))
13** j = 1
14**
15** There is an actual error calculation, but it comes out to slightly
16** more than double on each iteration. I think it results in about 17
17** million correct digits, instead of 16 million if it actually
18** doubled. PI16 generates 178,000 digits. PI19 to over a million.
19** PI22 is 10 million, and PI26 to 200 million.
20**
21** For what little it's worth, this program is placed into the public
22** domain by its author, Carey Bloodworth, on September 21, 1996.
23**
24** The math routines in this program are not general purpose routines.
25** They have been optimized and written for this specific use. The
26** concepts are of course portable, but not the implementation.
27**
28** The program run time is about O(n^1.7). That's fairly good growth,
29** compared to things such as the classic arctangents. But not as good
30** as it should be, if I used a FFT based multiplication. Also, the 'O'
31** is fairly large. In fact, I'd guess that this program could compute
32** one million digits of pi in about the same time as my previously
33** posted arctangent method, in spite of this one having less than n^2
34** growth.
35**
36** The program has not been cleaned up. It's written rather crudely
37** and dirty. The RSqrt() in particular is rather dirty, having gone
38** through numerous changes and attempts at speeding it up.
39** But I'm not planning on doing any more with it. The growth isn't as
40** low as I'd hoped, and until I find a faster multiplication, the
41** method isn't any better than simpler arctangents.
42**
43** I currently use a base of 10,000 simply because it made debugging
44** easier. A base of 65,536 and modifying the FastMul() to handle sizes
45** that aren't powers of two would allow a bit better efficiency.
46*/
47
48#include <stdio.h>
49#include <stdlib.h>
50#include <math.h>
51#include <time.h>
52#include "snipmath.h"
53
54#ifdef __TURBOC__
55typedef short int Indexer;
56#else
57typedef long int Indexer;
58#endif
59
60typedef short int Short;
61typedef long int Long;
62
63Short *a, *b, *c, *Work1, *MulWork, *Work2, *Work3;
64int SIZE;
65
66/*
67** Work1 is explicitly used in Reciprocal() and RSqrt()
68** Work1 is implicitly used in Divide() and Sqrt()
69** Work2 is explicitly used in Divide() and Sqrt()
70** Work3 is used only in the AGM and holds the previous reciprocal
71** square root, allowing us to save some time in RSqrt()
72*/
73
74
75void Copy(Short *a, Short *b, Indexer Len)
76{
77 while (Len--) a[Len] = b[Len];
78}
79
80/*
81** This rounds and copies a 2x mul result into a normal result
82** Our number format will never have more than one unit of integer,
83** and after a mul, we have two, so we need to fix that.
84*/
85
86void Round2x(Short *a, Short *b, Indexer Len)
87{
88 Indexer x;
89 Short carry;
90
91 carry = 0;
92 if (b[Len+1] >= 5000)
93 carry = 1;
94 for (x = Len; x > 0; x--)
95 {
96 carry += b[x];
97 a[x-1] = carry % 10000;
98 carry /= 10000;
99 }
100}
101
102void DivBy2(Short *n, Indexer Len)
103{
104 Indexer x;
105 Long temp;
106
107 temp = 0;
108 for (x = 0; x < Len; x++)
109 {
110 temp = (Long)n[x]+temp*10000;
111 n[x] = (Short)(temp/2);
112 temp = temp%2;
113 }
114}
115
116void DoCarries(Short *limit, Short *cur, Short carry)
117{
118 Long temp;
119
120 while ((cur >= limit) && (carry != 0))
121 {
122 temp = *cur+carry;
123 carry = 0;
124 if (temp >= 10000)
125 {
126 carry = 1;
127 temp -= 10000;
128 }
129 *cur = temp;
130 --cur;
131 }
132}
133
134void DoBorrows(Short *limit, Short *cur, Short borrow)
135{
136 Long temp;
137 while ((cur >= limit) && (borrow != 0))
138 {
139 temp = *cur-borrow;
140 borrow = 0;
141 if (temp < 0)
142 {
143 borrow = 1;
144 temp += 10000;
145 }
146 *cur = temp;
147 --cur;
148 };
149}
150
151void PrintShort2(char *str, Short *num, Indexer Len)
152{
153 Indexer x;
154
155 printf("%s ", str);
156 printf("%u.", num[0]);
157 for (x = 1; x < Len; x++)
158 printf("%04u", num[x]);
159 printf("\n");
160}
161
162void PrintShort(char *str, Short *num, Indexer Len)
163{
164 Indexer x;
165 int printed = 0;
166
167 printf("%s ", str);
168
169 printf("%u.\n", num[0]);
170
171 for (x = 1; x < Len; x++)
172 {
173 printf("%02d", num[x]/100);
174 printed += 2;
175 if ((printed % 1000) == 0)
176 {
177 printf("\n\n\n\n");
178 printed = 0;
179 }
180 else if ((printed % 50) == 0)
181 {
182 printf("\n");
183 }
184 else if ((printed % 10) == 0)
185 {
186 printf(" ");
187 }
188 printf("%02d", num[x] % 100);
189 printed += 2;
190 if ((printed % 1000) == 0)
191 {
192 printf("\n\n\n\n");
193 printed = 0;
194 }
195 else if ((printed % 50) == 0)
196 {
197 printf("\n");
198 }
199 else if ((printed % 10) == 0)
200 {
201 printf(" ");
202 }
203 }
204 printf("\n");
205
206}
207
208/* sum = a + b */
209
210Short Add(Short *sum, Short *a, Short *b, Indexer Len)
211{
212 Long s, carry;
213 Indexer x;
214
215 carry = 0;
216 for (x = Len - 1; x >= 0; x--)
217 {
218 s = (Long)a[x] + (Long)b[x] + carry;
219 sum[x] = (Short)(s%10000);
220 carry = s/10000;
221 }
222 return carry;
223}
224
225/* dif = a-b */
226
227Short Sub(Short *dif, Short *a, Short *b, Indexer Len)
228{
229 Long d, borrow;
230 Indexer x;
231
232 borrow = 0;
233 for (x = Len - 1; x >= 0; x--)
234 {
235 d = (Long)a[x] - (Long)b[x] - borrow;
236 borrow = 0;
237 if (d < 0)
238 {
239 borrow = 1;
240 d += 10000;
241 }
242 dif[x] = (Short)d;
243 }
244 return borrow;
245}
246
247void Negate(Short *num, Indexer Len)
248{
249 Indexer x;Long d, borrow;
250
251 borrow = 0;
252 for (x = Len - 1; x >= 0; x--)
253 {
254 d = 0 - num[x] - borrow;
255 borrow = 0;
256 if (d < 0)
257 {
258 borrow = 1;
259 d += 10000;
260 }
261 num[x] = (Short)d;
262 }
263}
264
265/* prod = a*b. prod should be twice normal length */
266
267void Mul(Short *prod, Short *a, Short *b, Indexer Len)
268{
269 Long p;
270 Indexer ia, ib, ip;
271
272 if ((prod == a) || (b == prod))
273 {
274 printf("MUL product can't be one of the other arguments\n");
275 exit(1);
276 }
277
278 for (ip = 0; ip < Len * 2; ip++)
279 prod[ip] = 0;
280 for (ib = Len - 1; ib >= 0; ib--)
281 {
282 if (b[ib] == 0)
283 continue;
284 ip = ib + Len;
285 p = 0;
286 for (ia = Len - 1; ia >= 0; ia--)
287 {
288 p = (Long)a[ia]*(Long)b[ib] + p + prod[ip];
289 prod[ip] = p%10000;
290 p = p/10000;
291 ip--;
292 }
293 while ((p) && (ip >= 0))
294 {
295 p += prod[ip];
296 prod[ip] = p%10000;
297 p = p/10000;
298 ip--;
299 }
300 }
301}
302
303/*
304** This is based on the simple O(n^1.585) method, although my
305** growth seems to be closer to O(n^1.7)
306**
307** It's fairly simple. a*b is: a2b2(B^2+B)+(a2-a1)(b1-b2)B+a1b1(B+1)
308**
309** For a = 4711 and b = 6397, a2 = 47 a1 = 11 b2 = 63 b1 = 97 Base = 100
310**
311** If we did that the normal way, we'd do
312** a2b2 = 47*63 = 2961
313** a2b1 = 47*97 = 4559
314** a1b2 = 11*63 = 693
315** a1b1 = 11*97 = 1067
316**
317** 29 61
318** 45 59
319** 6 93
320** 10 67
321** -----------
322** 30 13 62 67
323**
324** Or, we'd need N*N multiplications.
325**
326** With the 'fractal' method, we compute:
327** a2b2 = 47*63 = 2961
328** (a2-b1)(b1-b2) = (47-11)(97-63) = 36*34 = 1224
329** a1b1 = 11*97 = 1067
330**
331** 29 61
332** 29 61
333** 12 24
334** 10 67
335** 10 67
336** -----------
337** 30 13 62 67
338**
339** We need only 3 multiplications, plus a few additions. And of course,
340** additions are a lot simpler and faster than multiplications, so we
341** end up ahead.
342**
343** The way it is written requires that the size to be a power of two.
344** That's why the program itself requires the size to be a power of two.
345** There is no actual requirement in the method, it's just how I did it
346** because I would be working close to powers of two anyway, and it was
347** easier.
348*/
349
350void FastMul(Short *prod, Short *a, Short *b, Indexer Len)
351{
352 Indexer x, HLen;
353 int SignA, SignB;
354 Short *offset;
355 Short *NextProd;
356
357 /*
358 ** This is the threshold where it's faster to do it the ordinary way
359 ** On my computer, it's 16. Yours may be different.
360 */
361
362 if (Len <= 16 )
363 {
364 Mul(prod, a, b, Len);
365 return;
366 }
367
368 HLen = Len/2;
369 NextProd = prod + 2*Len;
370
371 SignA = Sub(prod, a, a + HLen, HLen);
372 if (SignA)
373 {
374 SignA = 1;
375 Negate(prod, HLen);
376 }
377 SignB = Sub(prod + HLen, b + HLen, b, HLen);
378 if (SignB)
379 {
380 SignB = 1;
381 Negate(prod + HLen, HLen);
382 }
383
384 FastMul(NextProd, prod, prod + HLen, HLen);
385 for (x = 0; x < 2 * Len; x++)
386 prod[x] = 0;
387 offset = prod + HLen;
388 if (SignA == SignB)
389 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
390 else DoBorrows(prod, offset - 1, Sub(offset, offset, NextProd, Len));
391
392 FastMul(NextProd, a, b, HLen);
393 offset = prod + HLen;
394 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
395 Add(prod, prod, NextProd, Len);
396
397 FastMul(NextProd, a + HLen, b + HLen, HLen);
398 offset = prod + HLen;
399 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
400 offset = prod + Len;
401 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
402}
403
404/*
405** Compute the reciprocal of 'a'.
406** x[k+1] = x[k]*(2-a*x[k])
407*/
408
409void Reciprocal(Short *r, Short *n, Indexer Len)
410{
411 Indexer x, SubLen;
412 int iter;
413 double t;
414
415 /* Estimate our first reciprocal */
416
417 for (x = 0; x < Len; x++)
418 r[x] = 0;
419 t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
420 if (t == 0.0)
421 t = 1;
422 t = 1.0/t;
423 r[0] = t;
424 t = (t - floor(t))*10000.0;
425 r[1] = t;
426 t = (t - floor(t))*10000.0;
427 r[2] = t;
428
429 iter = log(SIZE)/log(2.0) + 1;
430 SubLen = 1;
431 while (iter--)
432 {
433 SubLen *= 2;
434 if (SubLen > SIZE)
435 SubLen = SIZE;
436 FastMul(MulWork, n, r, SubLen);
437 Round2x(Work1, MulWork, SubLen);
438
439 MulWork[0] = 2;
440 for (x = 1; x < Len; x++)
441 MulWork[x] = 0;
442 Sub(Work1, MulWork, Work1, SubLen);
443
444 FastMul(MulWork, r, Work1, SubLen);
445 Round2x(r, MulWork, SubLen);
446 }
447}
448
449/*
450** Computes the reciprocal of the square root of 'a'
451** y[k+1] = y[k]*(3-a*y[k]^2)/2
452**
453**
454** We can save quite a bit of time by using part of our previous
455** results! Since the number is converging onto a specific value,
456** at least part of our previous result will be correct, so we
457** can skip a bit of work.
458*/
459
460void RSqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
461{
462 Indexer x;
463 int iter;
464 double t;
465
466 /* Estimate our first reciprocal square root */
467
468 if (SubLen < 2 )
469 {
470 for (x = 0; x < Len; x++)
471 r[x] = 0;
472 t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
473 if (t == 0.0)
474 t = 1;
475 t = 1.0/sqrt(t);
476 r[0] = t;
477 t = (t - floor(t))*10000.0;
478 r[1] = t;
479 t = (t - floor(t))*10000.0;
480 r[2] = t;
481 }
482
483 /*
484 ** PrintShort2("\n ~R: ", r, SIZE);
485 */
486
487 if (SubLen > SIZE)
488 SubLen = SIZE;
489 iter = SubLen;
490 while (iter <= SIZE*2)
491 {
492 FastMul(MulWork, r, r, SubLen);
493 Round2x(Work1, MulWork, SubLen);
494 FastMul(MulWork, n, Work1, SubLen);
495 Round2x(Work1, MulWork, SubLen);
496
497 MulWork[0] = 3;
498 for (x = 1; x < SubLen; x++)
499 MulWork[x] = 0;
500 Sub(Work1, MulWork, Work1, SubLen);
501
502 FastMul(MulWork, r, Work1, SubLen);
503 Round2x(r, MulWork, SubLen);
504 DivBy2(r, SubLen);
505
506 /*
507 printf("%3d", SubLen);
508 PrintShort2("R: ", r, SubLen);
509 printf("%3d", SubLen);
510 PrintShort2("R: ", r, Len);
511 */
512
513 SubLen *= 2;
514 if (SubLen > SIZE)
515 SubLen = SIZE;
516 iter *= 2;
517 }
518}
519
520/*
521** d = a/b by computing the reciprocal of b and then multiplying
522** that by a. It's faster this way because the normal digit by
523** digit method has N^2 growth, and this method will have the same
524** growth as our multiplication method.
525*/
526
527void Divide(Short *d, Short *a, Short *b, Indexer Len)
528{
529 Reciprocal(Work2, b, Len);
530 FastMul(MulWork, a, Work2, Len);
531 Round2x(d, MulWork, Len);
532}
533
534/*
535** r = sqrt(n) by computing the reciprocal of the square root of
536** n and then multiplying that by n
537*/
538
539void Sqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
540{
541 RSqrt(Work2, n, Len, SubLen);
542 FastMul(MulWork, n, Work2, Len);
543 Round2x(r, MulWork, Len);
544}
545
546void usage(void)
547{
548 puts("This program requires the number of digits/4 to calculate");
549 puts("This number must be a power of two.");
550 exit(-1);
551}
552
553int main(int argc,char *argv[])
554{
555 Indexer x;
556 Indexer SubLen;
557 double Pow2, tempd, carryd;
558 int AGMIter;
559 int NeededIter;
560 clock_t T0, T1, T2, T3;
561
562 if (argc < 2)
563 usage();
564
565 SIZE = atoi(argv[1]);
566 if (!ispow2(SIZE))
567 usage();
568
569 a = (Short *)malloc(sizeof(Short)*SIZE);
570 b = (Short *)malloc(sizeof(Short)*SIZE);
571 c = (Short *)malloc(sizeof(Short)*SIZE);
572 Work1 = (Short *)malloc(sizeof(Short)*SIZE);
573 Work2 = (Short *)malloc(sizeof(Short)*SIZE);
574 Work3 = (Short *)malloc(sizeof(Short)*SIZE);
575 MulWork = (Short *)malloc(sizeof(Short)*SIZE*4);
576
577 if ((a ==NULL) || (b == NULL) || (c == NULL) || (Work1 == NULL) ||
578 (Work2 == NULL) || (MulWork == NULL))
579 {
580 printf("Unable to allocate memory\n");exit(1);
581 }
582
583 NeededIter = log(SIZE)/log(2.0) + 1;
584 Pow2 = 4.0;
585 AGMIter = NeededIter + 1;
586 SubLen = 1;
587
588 T0 = clock();
589
590 for (x = 0; x < SIZE; x++)
591 a[x] = b[x] = c[x] = Work3[x] = 0;
592 a[0] = 1;
593 Work3[0] = 2;
594 RSqrt(b, Work3, SIZE, 1);
595 c[0] = 1;
596
597 T1 = clock();
598
599 /*
600 printf("AGMIter %d\n", AGMIter);
601 PrintShort2("a AGM: ", a, SIZE);
602 PrintShort2("b AGM: ", b, SIZE);
603 PrintShort2("C sum: ", c, SIZE);
604 */
605
606 while (AGMIter--)
607 {
608 Sub(Work1, a, b, SIZE); /* w = (a-b)/2 */
609 DivBy2(Work1, SIZE);
610 FastMul(MulWork, Work1, Work1, SIZE); /* m = w*w */
611 Round2x(Work1, MulWork, SIZE);
612
613 carryd = 0.0; /* m = m* w^(J+1) */
614 for (x = SIZE - 1; x >= 0; x--)
615 {
616 tempd = Pow2*Work1[x] + carryd;
617 carryd = floor(tempd / 10000.0);
618 Work1[x] = tempd - carryd*10000.0;
619 }
620 Pow2 *= 2.0;
621 Sub(c, c, Work1, SIZE); /* c = c - m */
622
623 /* Save some time on last iter */
624
625 if (AGMIter != 0)
626 FastMul(MulWork, a, b, SIZE); /* m = a*b */
627 Add(a, a, b, SIZE); /* a = (a+b)/2 */
628 DivBy2(a, SIZE);
629 if (AGMIter != 0)
630 {
631 Round2x(Work3, MulWork, SIZE);
632 Sqrt(b, Work3, SIZE, SubLen); /* b = sqrt(a*b) */
633 }
634 /*
635 printf("AGMIter %d\n", AGMIter);
636 PrintShort2("a AGM: ", a, SIZE);
637 PrintShort2("b AGM: ", b, SIZE);
638 PrintShort2("C sum: ", c, SIZE);
639 */
640 SubLen *= 2;if (SubLen > SIZE) SubLen = SIZE;
641 }
642
643 T2 = clock();
644
645 FastMul(MulWork, a, a, SIZE);
646 Round2x(a, MulWork, SIZE);
647 carryd = 0.0;
648 for (x = SIZE - 1; x >= 0; x--)
649 {
650 tempd = 4.0*a[x] + carryd;
651 carryd = floor(tempd / 10000.0);
652 a[x] = tempd - carryd*10000.0;
653 }
654
655 Divide(b, a, c, SIZE);
656 T3 = clock();
657
658 PrintShort("\nPI = ", b, SIZE);
659
660 printf("\nTotal Execution time: %12.4lf\n",
661 (double)(T3 - T0) / CLOCKS_PER_SEC);
662 printf("Setup time : %12.4lf\n",
663 (double)(T1 - T0) / CLOCKS_PER_SEC);
664 printf("AGM Calculation time: %12.4lf\n",
665 (double)(T2 - T1) / CLOCKS_PER_SEC);
666 printf("Post calc time : %12.4lf\n",
667 (double)(T3 - T2) / CLOCKS_PER_SEC);
668
669 return 0;
670}

17 million digits in O(n^1.7), using the Salamin/Brent/Gauss arithmetic/geometric mean pi formula.