円周率の計算 (5)

8 月 13 日版のプログラム・リストを下に掲載します。
(8 月 17 日追記: コメント中の aran255 の公式の 44*acot(2072) の符号が誤っていたのを修正しました。 プログラム本体には誤りはありません。)
pi_atan.h

/**************************************************/
/* pi_atan.h -- PI calculation by arctan formulas */
/* 2011-08-09 : Created by pcm1723                */
/* 2011-08-13 : "Spigot" added, RAM usage reduced */
/**************************************************/

#define Ndigits_Spigot 40

#define Ndigits_Machin 183
#define LEN_Machin      77
#define N1_Machin      255
#define N2_Machin       77

#define Ndigits_Gauss 326
#define LEN_Gauss     137
#define N1_Gauss      255
#define N2_Gauss      185
#define N3_Gauss      137

#define Ndigits_Stoermer 454
#define LEN_Stoermer     191
#define N1_Stoermer      255
#define N2_Stoermer      191
#define N3_Stoermer      159
#define N4_Stoermer      109

#define Ndigits_atan255  611
#define LEN_atan255      255
#define N1_atan255       255
#define N2_atan255       221
#define N3_atan255       191
#define N4_atan255       181
#define N5_atan255       153
#define N6_atan255       151

//#define MP8LEN_MAX 255
#define MP8LEN_MAX LEN_atan255

// multi precision (MP) array of 8-bit numbers
typedef uint8_t mp8_t[MP8LEN_MAX];

#if defined(ADDSUBONLY) // use ADD/SUB only, no shift/rotate

  #define mul_add_u8(prod,cand,lier,carry) \
  __asm__ __volatile__ ( \
/* code                                               */ \
  "  push  %2\t\n"     /* ; save multiplier           */ \
  "  mov   %A0,%3\t\n" /* ; prod_L <- carry byte      */ \
  "  clr   %B0\t\n"    /* ; prod_H <- 0               */ \
  "  tst   %2\t\n"     /* ; test multiplier           */ \
  "  breq  L2_%=\t\n"  /* ; skip loop if (0 == lier)  */ \
  "L1_%=:\t\n"                                           \
  "  add  %A0,%1\t\n"  /* ; add multiplicand          */ \
  "  adc  %B0,r1\t\n"  /* ; carry propagation         */ \
  "  dec  %2\t\n"      /* ; decrement multiplier      */ \
  "  brne L1_%=\t\n"   /* ; loop again if (0 != lier) */ \
  "L2_%=:\t\n"                                           \
  "  pop  %2\t\n"      /* ; retore multiplier         */ \
/* output register     */ \
  : "=&w"(prod)  /* %0 */ \
/* input registers     */ \
  : "r"(cand),   /* %1 */ \
    "a"(lier),   /* %2 */ \
    "r"(carry)   /* %3 */ \
/* crobber registers   */ \
  : /* none            */ \
  ) /*  __asm__ () */
  
#else // use MUL operation

  #define mul_add_u8(prod,cand,lier,carry) \
  __asm__ __volatile__ ( \
/* code                                        */ \
  "mul  %1,%2\t\n"  /* ; R1:R0 = cand * lier   */ \
  "mov  %A0,r0\t\n" /* ; low  byte of product  */ \
  "mov  %B0,r1\t\n" /* ; high byte of product  */ \
  "clr  r1\t\n"     /* ; restore zero register */ \
  "add  %A0,%3\t\n" /* ; add carry-in byte     */ \
  "adc  %B0,r1\t\n" /* ; carry propagation     */ \
/* output register     */ \
  : "=&w"(prod)  /* %0 */ \
/* input registers     */ \
  : "r"(cand),   /* %1 */ \
    "r"(lier),   /* %2 */ \
    "r"(carry)   /* %3 */ \
/* crobber registers   */ \
  : /* none            */ \
  ) /*  __asm__ () */
  
#endif

#if defined(ARDUINO) // use Serial class for output
  #define PRINT_STR(s)   Serial.print(s)
  #define PRINT_DEC(d)   Serial.print(d,DEC)
  #define PRINTLN_STR(s) Serial.println(s)
  #define PRINTLN_DEC(d) Serial.println(d,DEC)
#else // use conventional 'printf'
  #define PRINT_STR(s)   printf(s)
  #define PRINT_DEC(d)   printf("%d",d)
  #define PRINTLN_STR(s) printf("%s\r\xa",s)
  #define PRINTLN_DEC(d) printf("%d\r\xa",d)
#endif

pi_atan.pde

/**************************************************/
/* pi_atan.c -- PI calculation by Machin-like     */
/*              arctan formulas                   */
/*                                                */
/* 2011-08-09 : Created by pcm1723                */
/* 2011-08-13 : "Spigot" added, RAM usage reduced */
/**************************************************/

#include <inttypes.h>

#define ADDSUBONLY 1

#if defined(__AVR__)
  #define ARDUINO 1
#else  
  #include <stdio.h>
#endif

#include "pi_atan.h"

mp8_t a1, a2; // arccot results

uint8_t mp8_len = MP8LEN_MAX;

#if defined(ARDUINO)
uint32_t elapsed;

//   (uint16_t) dividend  / (uint8_t) divisor 
// = (uint8_t)  remainder : (uint8_t) quotient
//
uint16_t div_u16_u8(uint16_t dividend, uint8_t divisor)
{
  register uint8_t  cnt;      // loop counter
  register uint16_t rem8quo8; // result (rem:high byte, quo:low byte)
  
#if defined(ADDSUBONLY)  

  __asm__ __volatile__ ( \
/* code                                                  */ \
  "  mov   r0,%B1\t\n"  /* ; r0  <-- dend_H (%B1)        */ \
  "  mov   %B1,%A1\t\n" /* ; %B1 <-- former dend_L (%A1) */ \
  "  clr   %A1\t\n"     /* ; clear quotient (%A1)        */ \
  "L_1%=:\t\n"                                              \
  "  inc   %A1\t\n"     /* ; increment quotient (%A1)    */ \
  "  sub   %B1,%2\t\n"  /* ; dend_L - divisor            */ \
  "  sbc   r0,r1\t\n"   /* ; dend_H - 0 - borrow         */ \
  "  brcc  L_1%=\t\n"   /* ; if (0 == C), try more       */ \
  "  dec   %A1\t\n"     /* ; adjust quotient             */ \
  "  add   %B1,%2\t\n"  /* ; restore remainder           */ \
/* output register        */ \
  : "=&w"(rem8quo8) /* %0 */ \
/* input registers        */ \
  : "0"(dividend),  /* %1 */ \
    "r"(divisor)    /* %2 */ \
/* clobber                */ \
  :                          \
 ); /* __asm__ () */
 
#else // use ROL operation

  __asm__ __volatile__ ( \
/* code                                                  */ \
  "  ldi   %3,8\t\n"   /* ; set loop count               */ \
  "  rol   %A1\t\n"    /* ; setup                        */ \
  "L_1%=:\t\n"                                              \
  "  rol   %B1\t\n"    /* ; shift left dend_H (%B1)      */ \
  "  brcs  L_2%=\t\n"  /* ; if (1 == C), force subtract  */ \
  "  cp    %B1,%2\t\n" /* ; test for (dividend-divisor)  */ \
  "  brcs  L_3%=\t\n"  /* ; branch if not subtractable   */ \
  "L_2%=:\t\n"                                              \
  "  sub   %B1,%2\t\n" /* ; subtract divisor (%2)        */ \
  "  clc\t\n"          /* ; force carry cleared          */ \
  "L_3%=:\t\n"                                              \
  "  rol   %A1\t\n"    /* ; shift-in quo. bit to LSB and */ \
                       /* ; shift-out dend. MSB to Carry */ \
  "  dec   %3\t\n"     /* ; decrement loop count (%3)    */ \
  "  brne  L_1%=\t\n"  /* ; loop finished ?              */ \
  "  com   %A1\t\n"    /* ; invert quotient (%A1)        */ \
/* output register        */ \
  : "=&w"(rem8quo8) /* %0 */ \
/* input registers        */ \
  : "0"(dividend),  /* %1 */ \
    "r"(divisor),   /* %2 */ \
    "a"(cnt)        /* %3 */ \
/* clobber                */ \
  :                          \
 ); /* __asm__ () */
 
#endif
  return (rem8quo8);
} // uint16_t div_u16_u8()
#endif // #if defined(ARDUINO)

// set multi precision array length 
//
void mp8_setlen(uint8_t len)
{
  mp8_len = len;
#if defined(ARDUINO)  
  elapsed = millis(); // remember milli-sec count at beginning
#endif  
} // void mp8_setlen()

// set a value to integer part of dst[] 
// and clear fraction part of dst[]
//
void mp8_setval(mp8_t   dst,     // MP dest. operand
                uint8_t intpart) // integer value
{
  register uint8_t  i;
  dst[0] = intpart; // integer part
  for (i = 1; i < mp8_len; i++) { 
    dst[i] = 0; // clear fraction part
  } // for (i = ...
} // void mp8_setval()

void mp8_copy(mp8_t dst, mp8_t src)
{
  register uint8_t  i;
  for (i = 0; i < mp8_len; i++) {
    dst[i] = src[i];
  } // for (i = ...
} // void mp8_copy()

// MP addition : dst[] = dst[] + src[]
//
void mp8_add(mp8_t dst, // MP dest.  operand
             mp8_t src) // MP source operand 
{
  register uint8_t  i, carry = 0; // clear carry
  register uint16_t acc;
// scan from LSB to MSB  
  for (i = mp8_len-1; 0xff != i; i--) {
    acc    = (dst[i] + src[i] + carry);
    dst[i] = (0xff & acc);
    carry  = (acc >> 8); // propergate carry
  } // for (i = ...
} // void mp8_add()

// MP subtraction (normal) : dst[] = dst[] - src[]
//
void mp8_sub(mp8_t dst, // MP dest.  operand
             mp8_t src) // MP source operand
{
  register uint8_t  i, carry = 1; // set carry (clear borrow)
  register uint16_t acc;
// scan from LSB to MSB  
  for (i = mp8_len-1; 0xff != i; i--) {
    acc    = (dst[i] + (0xff ^ src[i]) + carry);
    dst[i] = (0xff & acc);
    carry  = (acc >> 8); // propagate carry
  } // for (i = ...
} // void mp8_sub()

// MP subtraction (reverse) : dst[] = src[] - dst[]
//
void mp8_subr(mp8_t dst, // MP dest.  operand 
              mp8_t src) // MP source operand
{
  register uint8_t  i, carry = 1; // set carry (clear borrow)
  register uint16_t acc;
// scan from LSB to MSB  
  for (i = mp8_len-1; 0xff != i; i--) {
    acc    = (src[i] + (0xff ^ dst[i]) + carry);
    dst[i] = (0xff & acc);
    carry  = (acc >> 8); // propagate carry
  } // for (i = ...
} // void mp8_subr()

// MP subtraction from 1.0 : dst[] = 1.0 - dst[]
//
void mp8_sub_from1(mp8_t dst) // MP dest.  operand 
{
  uint8_t  i, carry = 1; // set carry (clear borrow)
  uint16_t acc;
// scan from LSB to MSB  
  for (i = mp8_len-1; 0xff != i; i--) {
    acc    = ((0 == i) + (0xff ^ dst[i]) + carry);
    dst[i] = (0xff & acc);
    carry  = (acc >> 8); // propagate carry
  } // for (i = ...
} // void mp8_from1()

// MP multiplication by single prec. multiplier
// cand[] = cand[] * lier
//
void mp8_mul(mp8_t   cand, // MP multiplicand  
             uint8_t lier) // single prec. multiplier
{
  uint8_t  i, carry = 0; // clear carry
  uint16_t acc;
// scan from LSB to MSB  
  for (i = mp8_len-1; 0xff != i; i--) {
#if defined(ARDUINO)
    mul_add_u8(acc, cand[i], lier, carry);
#else
    acc     = ((uint16_t)cand[i] * lier) + carry;
#endif    
    cand[i] = (0xff & acc);
    carry   = (acc >> 8);
  } // for (i = ...
} // void mp8_mul()

// MP integer division by single prec. divisor
// dend[] = dend[] / divisor
//
void mp8_div(mp8_t   dend,    // multi  prec. dividend
             uint8_t divisor) // single prec. divisor
{
  register uint8_t  i, carry = 0; // clear carry
  register uint16_t acc;
// scan from MSB downto LSB  
  for (i = 0; i < mp8_len; i++) {
    acc     = ((carry << 8) | dend[i]);
#if defined(ARDUINO)
    acc     = div_u16_u8(acc, divisor);
    carry   = (acc >> 8);   // remainder in high byte of acc
    dend[i] = (0xff & acc); // quotient  in low  byte of acc
#else
    carry   = (acc % divisor); // propergate carry
    dend[i] = (acc / divisor);
#endif    
  } // for (i = ...
} // void mp8_div()

// MP integer division by single prec. divisor twice
// dend[] = (dend[] / divisor) / divisor
//
void mp8_div_sq(mp8_t   dend,    // multi  prec. dividend
                uint8_t divisor) // single prec. divisor
{
  mp8_div(dend, divisor); // dend[] = dend[] / divisor
  mp8_div(dend, divisor); // dend[] = dend[] / divisor
} // void mp8_div_sq()

void print_number(mp8_t    c,      // MP number to print
                  mp8_t    ro,     // MP work
                  uint16_t digits) // number of digits
{
  register uint16_t i;
  mp8_copy(ro, c); 
  for (i = 0; digits > i; i++) {
    PRINT_DEC(ro[0]); // decimal digit
    if (0 == (i % 10)) { // insert space
      PRINT_STR(((0 == i) ? "." : " "));
      if (0 == (i % 50)) { 
        PRINTLN_STR("");
      } // if
    } // if (0 == ..
    ro[0] = 0; // clear current digit
    mp8_mul(ro, 10); // next decimal digit
  } // for (i = ...
} // print_number()

// calculate arccot(arg)
//
// if arg is too big to fit in an 8-bit number,
// factorized args must be supplied (3 factors max.)
// (0 == arg_n) means no more factors
// 
// ex.   682 = 22 * 31
//     12943 =  7 * 43 * 43
//
void calc_acot(mp8_t   a,     // MP arctan result
               uint8_t Nterm, // number of terms 
               uint8_t arg1,  // argument
               uint8_t arg2,  // 2nd factor
               uint8_t arg3)  // 3rd factor
{
  uint8_t j;
  
  mp8_setval(a, 1); // sum = 1.0
// compute from the last term to the first term
  for (j = Nterm; j > 1; j -= 2) { // span over odd numbers
  mp8_mul(a, j-2);    // sum = sum * (j-2)
// compute arctan() term
// sum = (sum/arg)/arg
    mp8_div_sq(a, arg1); // sum = (sum / arg1) / arg1
    if (0 != arg2) { // factorized argument
      mp8_div_sq(a, arg2); // sum = (sum / arg2) / arg2
      if (0 != arg3) { // factorized argument
        mp8_div_sq(a, arg3); // sum = (sum / arg3) / arg3
      } // if (0 != arg3) ...
    } // if (0 != arg2) ...
    mp8_div(a, j);    // sum = sum / j
    mp8_sub_from1(a); // sum = 1.0 - sum
  } // for (j = ...
  mp8_div(a, arg1); // result = sum / arg
  if (0 != arg2) { // factorized argument
    mp8_div(a, arg2);
    if (0 != arg3) { // factorized argument
      mp8_div(a, arg3);
    } // if (0 != arg3) ...
  } // if (0 != arg2) ...
} // void calc_acot()  

void print_pi(mp8_t    d, // MP number to print
              mp8_t    w, // MP number work
              uint16_t digits, // number of digits
              char     *title) // title string
{
#if defined(ARDUINO)
  elapsed = millis() - elapsed; // calc elapsed time
#endif
  PRINT_STR  (title);
  PRINT_STR  (" (digits = ");
  PRINT_DEC  (digits-1);
  PRINT_STR  ("), ");
#if defined(ARDUINO)
  PRINT_STR  ("Elapsed time = ");
  PRINT_DEC  (elapsed);
  PRINTLN_STR(" ms");
#endif
  PRINT_STR  ("pi = ");
  print_number(d, w, digits);
  PRINTLN_STR("");
  PRINTLN_STR("");
} // void print_pi()

// Machin's formula
//
// pi/4 = 4 * arccot(5) - arccot(239)
//
void Machin(mp8_t a1, mp8_t a2)  
{
  mp8_setlen(LEN_Machin); // set MP array length
  calc_acot(a1, N1_Machin,   5, 0, 0); // arccot(5)
  calc_acot(a2, N2_Machin, 239, 0, 0); // arccot(239)
  mp8_mul(a1, 4);  // 4 * arccot(5)
  mp8_sub(a1, a2); // 4 * arccot(5) - arccot(239)
  mp8_mul(a1, 4);  // pi = 4 * (pi/4)
  print_pi(a1, a2, Ndigits_Machin, "Machin"); 
} // void Machin()

// Gauss's formula
//
// pi/4 = 12 * arccot(18) + 8 * arccot(57) - 5 * arccot(239)
//
void Gauss(mp8_t a1, mp8_t a2)  
{
  mp8_setlen(LEN_Gauss); // set MP array length
  calc_acot(a1, N1_Gauss,  18, 0, 0); // arccot(18)
  calc_acot(a2, N2_Gauss,  57, 0, 0); // arccot(57)
  mp8_mul(a1, 12); // 12 * arccot(18)
  mp8_mul(a2,  8); //  8 * arccot(57)
  mp8_add(a1, a2); // 12 * arccot(18) + 8 * arccot(57)
  calc_acot(a2, N2_Gauss, 239, 0, 0); // arccot(239)
  mp8_mul(a2,  5); //  5 * arccot(239)
  mp8_sub(a1, a2); // -5 * arccot(239)
  mp8_mul(a1,  4); // pi = 4 * (pi/4)
  print_pi(a1, a2, Ndigits_Gauss, "Gauss"); 
} // void Gauss()

// Stoermer's formula
//
// pi/4 = 44 * arccot(57)  +  7 * arccot(239)
//      - 12 * arccot(682) + 24 * arccot(12943)
//
void Stoermer(mp8_t a1, mp8_t a2)  
{
  mp8_setlen(LEN_Stoermer); // set MP array length
  calc_acot(a1, N1_Stoermer,  57,  0,  0); // arccot(57)
  calc_acot(a2, N2_Stoermer, 239,  0,  0); // arccot(239)
  mp8_mul(a1, 44); // 44 * arccot(57)
  mp8_mul(a2,  7); //  7 * arccot(239)
  mp8_add(a1, a2); // 44*arccot(57) + 7*arccot(239)
  calc_acot(a2, N3_Stoermer,  22, 31,  0); // arccot(682)
  mp8_mul(a2, 12); // 12 * arccot(682)
  mp8_sub(a1, a2); // -12*arccot(682)
  calc_acot(a2, N4_Stoermer,   7, 43, 43); // arccot(12943)
  mp8_mul(a2, 24); // 24 * arccot(12943)
  mp8_add(a1, a2); // +24*arccot(12943)
  mp8_mul(a1,  4); // pi = 4 * (pi/4)
  print_pi(a1, a2, Ndigits_Stoermer, "Stoermer"); 
} // void Stoermer()

//
// pi/4 = 227 * arccot(255)   -  100 * arccot(682)
//      +  44 * arccot(2072)  +   51 * arccot(2943)
//      -  27 * arccot(12943) +   88 * arccot(16432)
//
void atan255(mp8_t a1, mp8_t a2)  
{
  mp8_setlen(LEN_atan255); // set MP array length
  calc_acot(a1, N1_atan255, 255,  0,  0); // arccot(255)
  calc_acot(a2, N2_atan255,  22, 31,  0); // arccot(682)
  mp8_mul(a1,227); // 227 * arccot(255)
  mp8_mul(a2,100); // 100 * arccot(682)
  mp8_sub(a1, a2); // 227*arccot(255) - 100*arccot(682)
  calc_acot(a2, N3_atan255,  56, 37,  0); // arccot(2072)
  mp8_mul(a2, 44); // 44 * arccot(2072)
  mp8_add(a1, a2); //  +44*arccot(2072)
  calc_acot(a2, N4_atan255,  27, 109, 0); // arccot(2943)
  mp8_mul(a2, 51); // 51 * arccot(2943)
  mp8_add(a1, a2); //  +51*arccot(2943)
  calc_acot(a2, N5_atan255,   7, 43, 43); // arccot(12943)
  mp8_mul(a2, 27); // 27 * arccot(12943)
  mp8_sub(a1, a2); //  -27*arccot(12943)
  calc_acot(a2, N6_atan255,  16, 13, 79); // arccot(16432)
  mp8_mul(a2, 88); // 88 * arccot(16432)
  mp8_add(a1, a2); //  +88*arccot(16432)
  mp8_mul(a1,  4); // pi = 4 * (pi/4)
  print_pi(a1, a2, Ndigits_atan255, "atan255"); 
} // void atan255()

void Spigot( mp8_t num ) // MP work 
{
  const    uint8_t  base   = 100; // n-ary number
  const    uint8_t  nterms = 128; // number of terms
  const    uint8_t  n_step = 4;   // calculation interval
  register uint8_t  i;     // loop index
  register uint8_t  n;     // loop index
  register uint16_t temp;  // temporary work / carry
  register uint8_t  out;   // output digits / carry adjust
  register uint8_t  denom; // denominator of series
  
  for( i = 0 ; i < nterms ; i++ ) {
    num[i] = base / 5; // initalize array
  }
  PRINT_STR  ("Spigot (nterms = ");
  PRINT_DEC  (nterms);
  PRINTLN_STR(")");
  PRINT_STR  ("pi = ");
  out = 0;
  for( n = nterms ; n >= (nterms-2*Ndigits_Spigot); n -= n_step ) {
    temp = 0;
    for( i = n - 1 ; i > 0 ; i-- ) {
      denom = 2 * i - 1;
      temp = temp * i + num[i] * base;
      num[i] = temp % denom;
      temp /= denom;
    }
    i = (out + temp / base);
    PRINT_DEC(i / 10);
    if (0 == ((nterms-n) % (n_step * 5))) {
      PRINT_STR((nterms == n) ? ".\r\xa" : " ");
    } // if
    PRINT_DEC(i % 10);
    out = temp % base;
  }
  PRINTLN_STR("");
  PRINTLN_STR("");
} // void Spigot()

void setup_serial( void )
{
#if defined(ARDUINO)
  Serial.begin(38400); // 38.4 kbps
#endif
} // void setup_serial()

void setup( void )
{
  setup_serial();
  Spigot  (a1);
  Machin  (a1, a2); 
  Gauss   (a1, a2);
  Stoermer(a1, a2);
  atan255 (a1, a2);
} // void setup()

void loop( void )
{
} // void loop()

#if !defined(ARDUINO)
int main()
{
  setup();
  return(0);
} // int main()
#endif