License Public Domain
Lines 585
Keywords
math (4) pi (1)
Permissions
Viewable by Everyone
Editable by All Siafoo Users
Hide
Siafoo is here to make coding less frustrating and to save you time. Join Siafoo Now or Learn More

Calculate PI to 17 Million Digits Atom Feed 1

In Brief 17 million digits in O(n^1.7), using the Salamin/Brent/Gauss arithmetic/geometric mean pi formula.
# 's
  1/*
2** This method implements the Salamin / Brent / Gauss Arithmetic-
3** Geometric Mean pi formula.
4**
5** Let A[0] = 1, B[0] = 1/Sqrt(2)
6**
7** Then iterate from 1 to 'n'.
8** A[n] = (A[n-1] + B[n-1])/2
9** B[n] = Sqrt(A[n-1]*B[n-1])
10** C[n]^2 = A[n]^2 - B[n]^2 (or) C[n] = (A[n-1]-B[n-1])/2
11** n
12** PI[n] = 4A[n+1]^2 / (1-(Sum (2^(j+1))*C[j]^2))
13** j = 1
14**
15** There is an actual error calculation, but it comes out to slightly
16** more than double on each iteration. I think it results in about 17
17** million correct digits, instead of 16 million if it actually
18** doubled. PI16 generates 178,000 digits. PI19 to over a million.
19** PI22 is 10 million, and PI26 to 200 million.
20**
21** For what little it's worth, this program is placed into the public
22** domain by its author, Carey Bloodworth, on September 21, 1996.
23**
24** The math routines in this program are not general purpose routines.
25** They have been optimized and written for this specific use. The
26** concepts are of course portable, but not the implementation.
27**
28** The program run time is about O(n^1.7). That's fairly good growth,
29** compared to things such as the classic arctangents. But not as good
30** as it should be, if I used a FFT based multiplication. Also, the 'O'
31** is fairly large. In fact, I'd guess that this program could compute
32** one million digits of pi in about the same time as my previously
33** posted arctangent method, in spite of this one having less than n^2
34** growth.
35**
36** The program has not been cleaned up. It's written rather crudely
37** and dirty. The RSqrt() in particular is rather dirty, having gone
38** through numerous changes and attempts at speeding it up.
39** But I'm not planning on doing any more with it. The growth isn't as
40** low as I'd hoped, and until I find a faster multiplication, the
41** method isn't any better than simpler arctangents.
42**
43** I currently use a base of 10,000 simply because it made debugging
44** easier. A base of 65,536 and modifying the FastMul() to handle sizes
45** that aren't powers of two would allow a bit better efficiency.
46*/
47
48#include <stdio.h>
49#include <stdlib.h>
50#include <math.h>
51#include <time.h>
52#include "snipmath.h"
53
54#ifdef __TURBOC__
55typedef short int Indexer;
56#else
57typedef long int Indexer;
58#endif
59
60typedef short int Short;
61typedef long int Long;
62
63Short *a, *b, *c, *Work1, *MulWork, *Work2, *Work3;
64int SIZE;
65
66/*
67** Work1 is explicitly used in Reciprocal() and RSqrt()
68** Work1 is implicitly used in Divide() and Sqrt()
69** Work2 is explicitly used in Divide() and Sqrt()
70** Work3 is used only in the AGM and holds the previous reciprocal
71** square root, allowing us to save some time in RSqrt()
72*/
73
74
75void Copy(Short *a, Short *b, Indexer Len)
76{
77 while (Len--) a[Len] = b[Len];
78}
79
80/*
81** This rounds and copies a 2x mul result into a normal result
82** Our number format will never have more than one unit of integer,
83** and after a mul, we have two, so we need to fix that.
84*/
85
86void Round2x(Short *a, Short *b, Indexer Len)
87{
88 Indexer x;
89 Short carry;
90
91 carry = 0;
92 if (b[Len+1] >= 5000)
93 carry = 1;
94 for (x = Len; x > 0; x--)
95 {
96 carry += b[x];
97 a[x-1] = carry % 10000;
98 carry /= 10000;
99 }
100}
101
102void DivBy2(Short *n, Indexer Len)
103{
104 Indexer x;
105 Long temp;
106
107 temp = 0;
108 for (x = 0; x < Len; x++)
109 {
110 temp = (Long)n[x]+temp*10000;
111 n[x] = (Short)(temp/2);
112 temp = temp%2;
113 }
114}
115
116void DoCarries(Short *limit, Short *cur, Short carry)
117{
118 Long temp;
119
120 while ((cur >= limit) && (carry != 0))
121 {
122 temp = *cur+carry;
123 carry = 0;
124 if (temp >= 10000)
125 {
126 carry = 1;
127 temp -= 10000;
128 }
129 *cur = temp;
130 --cur;
131 }
132}
133
134void DoBorrows(Short *limit, Short *cur, Short borrow)
135{
136 Long temp;
137 while ((cur >= limit) && (borrow != 0))
138 {
139 temp = *cur-borrow;
140 borrow = 0;
141 if (temp < 0)
142 {
143 borrow = 1;
144 temp += 10000;
145 }
146 *cur = temp;
147 --cur;
148 };
149}
150
151void PrintShort2(char *str, Short *num, Indexer Len)
152{
153 Indexer x;
154
155 printf("%s ", str);
156 printf("%u.", num[0]);
157 for (x = 1; x < Len; x++)
158 printf("%04u", num[x]);
159 printf("\n");
160}
161
162void PrintShort(char *str, Short *num, Indexer Len)
163{
164 Indexer x;
165 int printed = 0;
166
167 printf("%s ", str);
168
169 printf("%u.\n", num[0]);
170
171 for (x = 1; x < Len; x++)
172 {
173 printf("%02d", num[x]/100);
174 printed += 2;
175 if ((printed % 1000) == 0)
176 {
177 printf("\n\n\n\n");
178 printed = 0;
179 }
180 else if ((printed % 50) == 0)
181 {
182 printf("\n");
183 }
184 else if ((printed % 10) == 0)
185 {
186 printf(" ");
187 }
188 printf("%02d", num[x] % 100);
189 printed += 2;
190 if ((printed % 1000) == 0)
191 {
192 printf("\n\n\n\n");
193 printed = 0;
194 }
195 else if ((printed % 50) == 0)
196 {
197 printf("\n");
198 }
199 else if ((printed % 10) == 0)
200 {
201 printf(" ");
202 }
203 }
204 printf("\n");
205
206}
207
208/* sum = a + b */
209
210Short Add(Short *sum, Short *a, Short *b, Indexer Len)
211{
212 Long s, carry;
213 Indexer x;
214
215 carry = 0;
216 for (x = Len - 1; x >= 0; x--)
217 {
218 s = (Long)a[x] + (Long)b[x] + carry;
219 sum[x] = (Short)(s%10000);
220 carry = s/10000;
221 }
222 return carry;
223}
224
225/* dif = a-b */
226
227Short Sub(Short *dif, Short *a, Short *b, Indexer Len)
228{
229 Long d, borrow;
230 Indexer x;
231
232 borrow = 0;
233 for (x = Len - 1; x >= 0; x--)
234 {
235 d = (Long)a[x] - (Long)b[x] - borrow;
236 borrow = 0;
237 if (d < 0)
238 {
239 borrow = 1;
240 d += 10000;
241 }
242 dif[x] = (Short)d;
243 }
244 return borrow;
245}
246
247void Negate(Short *num, Indexer Len)
248{
249 Indexer x;Long d, borrow;
250
251 borrow = 0;
252 for (x = Len - 1; x >= 0; x--)
253 {
254 d = 0 - num[x] - borrow;
255 borrow = 0;
256 if (d < 0)
257 {
258 borrow = 1;
259 d += 10000;
260 }
261 num[x] = (Short)d;
262 }
263}
264
265/* prod = a*b. prod should be twice normal length */
266
267void Mul(Short *prod, Short *a, Short *b, Indexer Len)
268{
269 Long p;
270 Indexer ia, ib, ip;
271
272 if ((prod == a) || (b == prod))
273 {
274 printf("MUL product can't be one of the other arguments\n");
275 exit(1);
276 }
277
278 for (ip = 0; ip < Len * 2; ip++)
279 prod[ip] = 0;
280 for (ib = Len - 1; ib >= 0; ib--)
281 {
282 if (b[ib] == 0)
283 continue;
284 ip = ib + Len;
285 p = 0;
286 for (ia = Len - 1; ia >= 0; ia--)
287 {
288 p = (Long)a[ia]*(Long)b[ib] + p + prod[ip];
289 prod[ip] = p%10000;
290 p = p/10000;
291 ip--;
292 }
293 while ((p) && (ip >= 0))
294 {
295 p += prod[ip];
296 prod[ip] = p%10000;
297 p = p/10000;
298 ip--;
299 }
300 }
301}
302
303/*
304** This is based on the simple O(n^1.585) method, although my
305** growth seems to be closer to O(n^1.7)
306**
307** It's fairly simple. a*b is: a2b2(B^2+B)+(a2-a1)(b1-b2)B+a1b1(B+1)
308**
309** For a = 4711 and b = 6397, a2 = 47 a1 = 11 b2 = 63 b1 = 97 Base = 100
310**
311** If we did that the normal way, we'd do
312** a2b2 = 47*63 = 2961
313** a2b1 = 47*97 = 4559
314** a1b2 = 11*63 = 693
315** a1b1 = 11*97 = 1067
316**
317** 29 61
318** 45 59
319** 6 93
320** 10 67
321** -----------
322** 30 13 62 67
323**
324** Or, we'd need N*N multiplications.
325**
326** With the 'fractal' method, we compute:
327** a2b2 = 47*63 = 2961
328** (a2-b1)(b1-b2) = (47-11)(97-63) = 36*34 = 1224
329** a1b1 = 11*97 = 1067
330**
331** 29 61
332** 29 61
333** 12 24
334** 10 67
335** 10 67
336** -----------
337** 30 13 62 67
338**
339** We need only 3 multiplications, plus a few additions. And of course,
340** additions are a lot simpler and faster than multiplications, so we
341** end up ahead.
342**
343** The way it is written requires that the size to be a power of two.
344** That's why the program itself requires the size to be a power of two.
345** There is no actual requirement in the method, it's just how I did it
346** because I would be working close to powers of two anyway, and it was
347** easier.
348*/
349
350void FastMul(Short *prod, Short *a, Short *b, Indexer Len)
351{
352 Indexer x, HLen;
353 int SignA, SignB;
354 Short *offset;
355 Short *NextProd;
356
357 /*
358 ** This is the threshold where it's faster to do it the ordinary way
359 ** On my computer, it's 16. Yours may be different.
360 */
361
362 if (Len <= 16 )
363 {
364 Mul(prod, a, b, Len);
365 return;
366 }
367
368 HLen = Len/2;
369 NextProd = prod + 2*Len;
370
371 SignA = Sub(prod, a, a + HLen, HLen);
372 if (SignA)
373 {
374 SignA = 1;
375 Negate(prod, HLen);
376 }
377 SignB = Sub(prod + HLen, b + HLen, b, HLen);
378 if (SignB)
379 {
380 SignB = 1;
381 Negate(prod + HLen, HLen);
382 }
383
384 FastMul(NextProd, prod, prod + HLen, HLen);
385 for (x = 0; x < 2 * Len; x++)
386 prod[x] = 0;
387 offset = prod + HLen;
388 if (SignA == SignB)
389 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
390 else DoBorrows(prod, offset - 1, Sub(offset, offset, NextProd, Len));
391
392 FastMul(NextProd, a, b, HLen);
393 offset = prod + HLen;
394 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
395 Add(prod, prod, NextProd, Len);
396
397 FastMul(NextProd, a + HLen, b + HLen, HLen);
398 offset = prod + HLen;
399 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
400 offset = prod + Len;
401 DoCarries(prod, offset - 1, Add(offset, offset, NextProd, Len));
402}
403
404/*
405** Compute the reciprocal of 'a'.
406** x[k+1] = x[k]*(2-a*x[k])
407*/
408
409void Reciprocal(Short *r, Short *n, Indexer Len)
410{
411 Indexer x, SubLen;
412 int iter;
413 double t;
414
415 /* Estimate our first reciprocal */
416
417 for (x = 0; x < Len; x++)
418 r[x] = 0;
419 t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
420 if (t == 0.0)
421 t = 1;
422 t = 1.0/t;
423 r[0] = t;
424 t = (t - floor(t))*10000.0;
425 r[1] = t;
426 t = (t - floor(t))*10000.0;
427 r[2] = t;
428
429 iter = log(SIZE)/log(2.0) + 1;
430 SubLen = 1;
431 while (iter--)
432 {
433 SubLen *= 2;
434 if (SubLen > SIZE)
435 SubLen = SIZE;
436 FastMul(MulWork, n, r, SubLen);
437 Round2x(Work1, MulWork, SubLen);
438
439 MulWork[0] = 2;
440 for (x = 1; x < Len; x++)
441 MulWork[x] = 0;
442 Sub(Work1, MulWork, Work1, SubLen);
443
444 FastMul(MulWork, r, Work1, SubLen);
445 Round2x(r, MulWork, SubLen);
446 }
447}
448
449/*
450** Computes the reciprocal of the square root of 'a'
451** y[k+1] = y[k]*(3-a*y[k]^2)/2
452**
453**
454** We can save quite a bit of time by using part of our previous
455** results! Since the number is converging onto a specific value,
456** at least part of our previous result will be correct, so we
457** can skip a bit of work.
458*/
459
460void RSqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
461{
462 Indexer x;
463 int iter;
464 double t;
465
466 /* Estimate our first reciprocal square root */
467
468 if (SubLen < 2 )
469 {
470 for (x = 0; x < Len; x++)
471 r[x] = 0;
472 t = n[0] + n[1]*1.0e-4 + n[2]*1.0e-8;
473 if (t == 0.0)
474 t = 1;
475 t = 1.0/sqrt(t);
476 r[0] = t;
477 t = (t - floor(t))*10000.0;
478 r[1] = t;
479 t = (t - floor(t))*10000.0;
480 r[2] = t;
481 }
482
483 /*
484 ** PrintShort2("\n ~R: ", r, SIZE);
485 */
486
487 if (SubLen > SIZE)
488 SubLen = SIZE;
489 iter = SubLen;
490 while (iter <= SIZE*2)
491 {
492 FastMul(MulWork, r, r, SubLen);
493 Round2x(Work1, MulWork, SubLen);
494 FastMul(MulWork, n, Work1, SubLen);
495 Round2x(Work1, MulWork, SubLen);
496
497 MulWork[0] = 3;
498 for (x = 1; x < SubLen; x++)
499 MulWork[x] = 0;
500 Sub(Work1, MulWork, Work1, SubLen);
501
502 FastMul(MulWork, r, Work1, SubLen);
503 Round2x(r, MulWork, SubLen);
504 DivBy2(r, SubLen);
505
506 /*
507 printf("%3d", SubLen);
508 PrintShort2("R: ", r, SubLen);
509 printf("%3d", SubLen);
510 PrintShort2("R: ", r, Len);
511 */
512
513 SubLen *= 2;
514 if (SubLen > SIZE)
515 SubLen = SIZE;
516 iter *= 2;
517 }
518}
519
520/*
521** d = a/b by computing the reciprocal of b and then multiplying
522** that by a. It's faster this way because the normal digit by
523** digit method has N^2 growth, and this method will have the same
524** growth as our multiplication method.
525*/
526
527void Divide(Short *d, Short *a, Short *b, Indexer Len)
528{
529 Reciprocal(Work2, b, Len);
530 FastMul(MulWork, a, Work2, Len);
531 Round2x(d, MulWork, Len);
532}
533
534/*
535** r = sqrt(n) by computing the reciprocal of the square root of
536** n and then multiplying that by n
537*/
538
539void Sqrt(Short *r, Short *n, Indexer Len, Indexer SubLen)
540{
541 RSqrt(Work2, n, Len, SubLen);
542 FastMul(MulWork, n, Work2, Len);
543 Round2x(r, MulWork, Len);
544}
545
546void usage(void)
547{
548 puts("This program requires the number of digits/4 to calculate");
549 puts("This number must be a power of two.");
550 exit(-1);
551}
552
553int main(int argc,char *argv[])
554{
555 Indexer x;
556 Indexer SubLen;
557 double Pow2, tempd, carryd;
558 int AGMIter;
559 int NeededIter;
560 clock_t T0, T1, T2, T3;
561
562 if (argc < 2)
563 usage();
564
565 SIZE = atoi(argv[1]);
566 if (!ispow2(SIZE))
567 usage();
568
569 a = (Short *)malloc(sizeof(Short)*SIZE);
570 b = (Short *)malloc(sizeof(Short)*SIZE);
571 c = (Short *)malloc(sizeof(Short)*SIZE);
572 Work1 = (Short *)malloc(sizeof(Short)*SIZE);
573 Work2 = (Short *)malloc(sizeof(Short)*SIZE);
574 Work3 = (Short *)malloc(sizeof(Short)*SIZE);
575 MulWork = (Short *)malloc(sizeof(Short)*SIZE*4);
576
577 if ((a ==NULL) || (b == NULL) || (c == NULL) || (Work1 == NULL) ||
578 (Work2 == NULL) || (MulWork == NULL))
579 {
580 printf("Unable to allocate memory\n");exit(1);
581 }
582
583 NeededIter = log(SIZE)/log(2.0) + 1;
584 Pow2 = 4.0;
585 AGMIter = NeededIter + 1;
586 SubLen = 1;
587
588 T0 = clock();
589
590 for (x = 0; x < SIZE; x++)
591 a[x] = b[x] = c[x] = Work3[x] = 0;
592 a[0] = 1;
593 Work3[0] = 2;
594 RSqrt(b, Work3, SIZE, 1);
595 c[0] = 1;
596
597 T1 = clock();
598
599 /*
600 printf("AGMIter %d\n", AGMIter);
601 PrintShort2("a AGM: ", a, SIZE);
602 PrintShort2("b AGM: ", b, SIZE);
603 PrintShort2("C sum: ", c, SIZE);
604 */
605
606 while (AGMIter--)
607 {
608 Sub(Work1, a, b, SIZE); /* w = (a-b)/2 */
609 DivBy2(Work1, SIZE);
610 FastMul(MulWork, Work1, Work1, SIZE); /* m = w*w */
611 Round2x(Work1, MulWork, SIZE);
612
613 carryd = 0.0; /* m = m* w^(J+1) */
614 for (x = SIZE - 1; x >= 0; x--)
615 {
616 tempd = Pow2*Work1[x] + carryd;
617 carryd = floor(tempd / 10000.0);
618 Work1[x] = tempd - carryd*10000.0;
619 }
620 Pow2 *= 2.0;
621 Sub(c, c, Work1, SIZE); /* c = c - m */
622
623 /* Save some time on last iter */
624
625 if (AGMIter != 0)
626 FastMul(MulWork, a, b, SIZE); /* m = a*b */
627 Add(a, a, b, SIZE); /* a = (a+b)/2 */
628 DivBy2(a, SIZE);
629 if (AGMIter != 0)
630 {
631 Round2x(Work3, MulWork, SIZE);
632 Sqrt(b, Work3, SIZE, SubLen); /* b = sqrt(a*b) */
633 }
634 /*
635 printf("AGMIter %d\n", AGMIter);
636 PrintShort2("a AGM: ", a, SIZE);
637 PrintShort2("b AGM: ", b, SIZE);
638 PrintShort2("C sum: ", c, SIZE);
639 */
640 SubLen *= 2;if (SubLen > SIZE) SubLen = SIZE;
641 }
642
643 T2 = clock();
644
645 FastMul(MulWork, a, a, SIZE);
646 Round2x(a, MulWork, SIZE);
647 carryd = 0.0;
648 for (x = SIZE - 1; x >= 0; x--)
649 {
650 tempd = 4.0*a[x] + carryd;
651 carryd = floor(tempd / 10000.0);
652 a[x] = tempd - carryd*10000.0;
653 }
654
655 Divide(b, a, c, SIZE);
656 T3 = clock();
657
658 PrintShort("\nPI = ", b, SIZE);
659
660 printf("\nTotal Execution time: %12.4lf\n",
661 (double)(T3 - T0) / CLOCKS_PER_SEC);
662 printf("Setup time : %12.4lf\n",
663 (double)(T1 - T0) / CLOCKS_PER_SEC);
664 printf("AGM Calculation time: %12.4lf\n",
665 (double)(T2 - T1) / CLOCKS_PER_SEC);
666 printf("Post calc time : %12.4lf\n",
667 (double)(T3 - T2) / CLOCKS_PER_SEC);
668
669 return 0;
670}

17 million digits in O(n^1.7), using the Salamin/Brent/Gauss arithmetic/geometric mean pi formula.