円周率の計算 (5)
8 月 13 日版のプログラム・リストを下に掲載します。
(8 月 17 日追記: コメント中の aran255 の公式の 44*acot(2072) の符号が誤っていたのを修正しました。 プログラム本体には誤りはありません。)
pi_atan.h
/**************************************************/ /* pi_atan.h -- PI calculation by arctan formulas */ /* 2011-08-09 : Created by pcm1723 */ /* 2011-08-13 : "Spigot" added, RAM usage reduced */ /**************************************************/ #define Ndigits_Spigot 40 #define Ndigits_Machin 183 #define LEN_Machin 77 #define N1_Machin 255 #define N2_Machin 77 #define Ndigits_Gauss 326 #define LEN_Gauss 137 #define N1_Gauss 255 #define N2_Gauss 185 #define N3_Gauss 137 #define Ndigits_Stoermer 454 #define LEN_Stoermer 191 #define N1_Stoermer 255 #define N2_Stoermer 191 #define N3_Stoermer 159 #define N4_Stoermer 109 #define Ndigits_atan255 611 #define LEN_atan255 255 #define N1_atan255 255 #define N2_atan255 221 #define N3_atan255 191 #define N4_atan255 181 #define N5_atan255 153 #define N6_atan255 151 //#define MP8LEN_MAX 255 #define MP8LEN_MAX LEN_atan255 // multi precision (MP) array of 8-bit numbers typedef uint8_t mp8_t[MP8LEN_MAX]; #if defined(ADDSUBONLY) // use ADD/SUB only, no shift/rotate #define mul_add_u8(prod,cand,lier,carry) \ __asm__ __volatile__ ( \ /* code */ \ " push %2\t\n" /* ; save multiplier */ \ " mov %A0,%3\t\n" /* ; prod_L <- carry byte */ \ " clr %B0\t\n" /* ; prod_H <- 0 */ \ " tst %2\t\n" /* ; test multiplier */ \ " breq L2_%=\t\n" /* ; skip loop if (0 == lier) */ \ "L1_%=:\t\n" \ " add %A0,%1\t\n" /* ; add multiplicand */ \ " adc %B0,r1\t\n" /* ; carry propagation */ \ " dec %2\t\n" /* ; decrement multiplier */ \ " brne L1_%=\t\n" /* ; loop again if (0 != lier) */ \ "L2_%=:\t\n" \ " pop %2\t\n" /* ; retore multiplier */ \ /* output register */ \ : "=&w"(prod) /* %0 */ \ /* input registers */ \ : "r"(cand), /* %1 */ \ "a"(lier), /* %2 */ \ "r"(carry) /* %3 */ \ /* crobber registers */ \ : /* none */ \ ) /* __asm__ () */ #else // use MUL operation #define mul_add_u8(prod,cand,lier,carry) \ __asm__ __volatile__ ( \ /* code */ \ "mul %1,%2\t\n" /* ; R1:R0 = cand * lier */ \ "mov %A0,r0\t\n" /* ; low byte of product */ \ "mov %B0,r1\t\n" /* ; high byte of product */ \ "clr r1\t\n" /* ; restore zero register */ \ "add %A0,%3\t\n" /* ; add carry-in byte */ \ "adc %B0,r1\t\n" /* ; carry propagation */ \ /* output register */ \ : "=&w"(prod) /* %0 */ \ /* input registers */ \ : "r"(cand), /* %1 */ \ "r"(lier), /* %2 */ \ "r"(carry) /* %3 */ \ /* crobber registers */ \ : /* none */ \ ) /* __asm__ () */ #endif #if defined(ARDUINO) // use Serial class for output #define PRINT_STR(s) Serial.print(s) #define PRINT_DEC(d) Serial.print(d,DEC) #define PRINTLN_STR(s) Serial.println(s) #define PRINTLN_DEC(d) Serial.println(d,DEC) #else // use conventional 'printf' #define PRINT_STR(s) printf(s) #define PRINT_DEC(d) printf("%d",d) #define PRINTLN_STR(s) printf("%s\r\xa",s) #define PRINTLN_DEC(d) printf("%d\r\xa",d) #endif
pi_atan.pde
/**************************************************/ /* pi_atan.c -- PI calculation by Machin-like */ /* arctan formulas */ /* */ /* 2011-08-09 : Created by pcm1723 */ /* 2011-08-13 : "Spigot" added, RAM usage reduced */ /**************************************************/ #include <inttypes.h> #define ADDSUBONLY 1 #if defined(__AVR__) #define ARDUINO 1 #else #include <stdio.h> #endif #include "pi_atan.h" mp8_t a1, a2; // arccot results uint8_t mp8_len = MP8LEN_MAX; #if defined(ARDUINO) uint32_t elapsed; // (uint16_t) dividend / (uint8_t) divisor // = (uint8_t) remainder : (uint8_t) quotient // uint16_t div_u16_u8(uint16_t dividend, uint8_t divisor) { register uint8_t cnt; // loop counter register uint16_t rem8quo8; // result (rem:high byte, quo:low byte) #if defined(ADDSUBONLY) __asm__ __volatile__ ( \ /* code */ \ " mov r0,%B1\t\n" /* ; r0 <-- dend_H (%B1) */ \ " mov %B1,%A1\t\n" /* ; %B1 <-- former dend_L (%A1) */ \ " clr %A1\t\n" /* ; clear quotient (%A1) */ \ "L_1%=:\t\n" \ " inc %A1\t\n" /* ; increment quotient (%A1) */ \ " sub %B1,%2\t\n" /* ; dend_L - divisor */ \ " sbc r0,r1\t\n" /* ; dend_H - 0 - borrow */ \ " brcc L_1%=\t\n" /* ; if (0 == C), try more */ \ " dec %A1\t\n" /* ; adjust quotient */ \ " add %B1,%2\t\n" /* ; restore remainder */ \ /* output register */ \ : "=&w"(rem8quo8) /* %0 */ \ /* input registers */ \ : "0"(dividend), /* %1 */ \ "r"(divisor) /* %2 */ \ /* clobber */ \ : \ ); /* __asm__ () */ #else // use ROL operation __asm__ __volatile__ ( \ /* code */ \ " ldi %3,8\t\n" /* ; set loop count */ \ " rol %A1\t\n" /* ; setup */ \ "L_1%=:\t\n" \ " rol %B1\t\n" /* ; shift left dend_H (%B1) */ \ " brcs L_2%=\t\n" /* ; if (1 == C), force subtract */ \ " cp %B1,%2\t\n" /* ; test for (dividend-divisor) */ \ " brcs L_3%=\t\n" /* ; branch if not subtractable */ \ "L_2%=:\t\n" \ " sub %B1,%2\t\n" /* ; subtract divisor (%2) */ \ " clc\t\n" /* ; force carry cleared */ \ "L_3%=:\t\n" \ " rol %A1\t\n" /* ; shift-in quo. bit to LSB and */ \ /* ; shift-out dend. MSB to Carry */ \ " dec %3\t\n" /* ; decrement loop count (%3) */ \ " brne L_1%=\t\n" /* ; loop finished ? */ \ " com %A1\t\n" /* ; invert quotient (%A1) */ \ /* output register */ \ : "=&w"(rem8quo8) /* %0 */ \ /* input registers */ \ : "0"(dividend), /* %1 */ \ "r"(divisor), /* %2 */ \ "a"(cnt) /* %3 */ \ /* clobber */ \ : \ ); /* __asm__ () */ #endif return (rem8quo8); } // uint16_t div_u16_u8() #endif // #if defined(ARDUINO) // set multi precision array length // void mp8_setlen(uint8_t len) { mp8_len = len; #if defined(ARDUINO) elapsed = millis(); // remember milli-sec count at beginning #endif } // void mp8_setlen() // set a value to integer part of dst[] // and clear fraction part of dst[] // void mp8_setval(mp8_t dst, // MP dest. operand uint8_t intpart) // integer value { register uint8_t i; dst[0] = intpart; // integer part for (i = 1; i < mp8_len; i++) { dst[i] = 0; // clear fraction part } // for (i = ... } // void mp8_setval() void mp8_copy(mp8_t dst, mp8_t src) { register uint8_t i; for (i = 0; i < mp8_len; i++) { dst[i] = src[i]; } // for (i = ... } // void mp8_copy() // MP addition : dst[] = dst[] + src[] // void mp8_add(mp8_t dst, // MP dest. operand mp8_t src) // MP source operand { register uint8_t i, carry = 0; // clear carry register uint16_t acc; // scan from LSB to MSB for (i = mp8_len-1; 0xff != i; i--) { acc = (dst[i] + src[i] + carry); dst[i] = (0xff & acc); carry = (acc >> 8); // propergate carry } // for (i = ... } // void mp8_add() // MP subtraction (normal) : dst[] = dst[] - src[] // void mp8_sub(mp8_t dst, // MP dest. operand mp8_t src) // MP source operand { register uint8_t i, carry = 1; // set carry (clear borrow) register uint16_t acc; // scan from LSB to MSB for (i = mp8_len-1; 0xff != i; i--) { acc = (dst[i] + (0xff ^ src[i]) + carry); dst[i] = (0xff & acc); carry = (acc >> 8); // propagate carry } // for (i = ... } // void mp8_sub() // MP subtraction (reverse) : dst[] = src[] - dst[] // void mp8_subr(mp8_t dst, // MP dest. operand mp8_t src) // MP source operand { register uint8_t i, carry = 1; // set carry (clear borrow) register uint16_t acc; // scan from LSB to MSB for (i = mp8_len-1; 0xff != i; i--) { acc = (src[i] + (0xff ^ dst[i]) + carry); dst[i] = (0xff & acc); carry = (acc >> 8); // propagate carry } // for (i = ... } // void mp8_subr() // MP subtraction from 1.0 : dst[] = 1.0 - dst[] // void mp8_sub_from1(mp8_t dst) // MP dest. operand { uint8_t i, carry = 1; // set carry (clear borrow) uint16_t acc; // scan from LSB to MSB for (i = mp8_len-1; 0xff != i; i--) { acc = ((0 == i) + (0xff ^ dst[i]) + carry); dst[i] = (0xff & acc); carry = (acc >> 8); // propagate carry } // for (i = ... } // void mp8_from1() // MP multiplication by single prec. multiplier // cand[] = cand[] * lier // void mp8_mul(mp8_t cand, // MP multiplicand uint8_t lier) // single prec. multiplier { uint8_t i, carry = 0; // clear carry uint16_t acc; // scan from LSB to MSB for (i = mp8_len-1; 0xff != i; i--) { #if defined(ARDUINO) mul_add_u8(acc, cand[i], lier, carry); #else acc = ((uint16_t)cand[i] * lier) + carry; #endif cand[i] = (0xff & acc); carry = (acc >> 8); } // for (i = ... } // void mp8_mul() // MP integer division by single prec. divisor // dend[] = dend[] / divisor // void mp8_div(mp8_t dend, // multi prec. dividend uint8_t divisor) // single prec. divisor { register uint8_t i, carry = 0; // clear carry register uint16_t acc; // scan from MSB downto LSB for (i = 0; i < mp8_len; i++) { acc = ((carry << 8) | dend[i]); #if defined(ARDUINO) acc = div_u16_u8(acc, divisor); carry = (acc >> 8); // remainder in high byte of acc dend[i] = (0xff & acc); // quotient in low byte of acc #else carry = (acc % divisor); // propergate carry dend[i] = (acc / divisor); #endif } // for (i = ... } // void mp8_div() // MP integer division by single prec. divisor twice // dend[] = (dend[] / divisor) / divisor // void mp8_div_sq(mp8_t dend, // multi prec. dividend uint8_t divisor) // single prec. divisor { mp8_div(dend, divisor); // dend[] = dend[] / divisor mp8_div(dend, divisor); // dend[] = dend[] / divisor } // void mp8_div_sq() void print_number(mp8_t c, // MP number to print mp8_t ro, // MP work uint16_t digits) // number of digits { register uint16_t i; mp8_copy(ro, c); for (i = 0; digits > i; i++) { PRINT_DEC(ro[0]); // decimal digit if (0 == (i % 10)) { // insert space PRINT_STR(((0 == i) ? "." : " ")); if (0 == (i % 50)) { PRINTLN_STR(""); } // if } // if (0 == .. ro[0] = 0; // clear current digit mp8_mul(ro, 10); // next decimal digit } // for (i = ... } // print_number() // calculate arccot(arg) // // if arg is too big to fit in an 8-bit number, // factorized args must be supplied (3 factors max.) // (0 == arg_n) means no more factors // // ex. 682 = 22 * 31 // 12943 = 7 * 43 * 43 // void calc_acot(mp8_t a, // MP arctan result uint8_t Nterm, // number of terms uint8_t arg1, // argument uint8_t arg2, // 2nd factor uint8_t arg3) // 3rd factor { uint8_t j; mp8_setval(a, 1); // sum = 1.0 // compute from the last term to the first term for (j = Nterm; j > 1; j -= 2) { // span over odd numbers mp8_mul(a, j-2); // sum = sum * (j-2) // compute arctan() term // sum = (sum/arg)/arg mp8_div_sq(a, arg1); // sum = (sum / arg1) / arg1 if (0 != arg2) { // factorized argument mp8_div_sq(a, arg2); // sum = (sum / arg2) / arg2 if (0 != arg3) { // factorized argument mp8_div_sq(a, arg3); // sum = (sum / arg3) / arg3 } // if (0 != arg3) ... } // if (0 != arg2) ... mp8_div(a, j); // sum = sum / j mp8_sub_from1(a); // sum = 1.0 - sum } // for (j = ... mp8_div(a, arg1); // result = sum / arg if (0 != arg2) { // factorized argument mp8_div(a, arg2); if (0 != arg3) { // factorized argument mp8_div(a, arg3); } // if (0 != arg3) ... } // if (0 != arg2) ... } // void calc_acot() void print_pi(mp8_t d, // MP number to print mp8_t w, // MP number work uint16_t digits, // number of digits char *title) // title string { #if defined(ARDUINO) elapsed = millis() - elapsed; // calc elapsed time #endif PRINT_STR (title); PRINT_STR (" (digits = "); PRINT_DEC (digits-1); PRINT_STR ("), "); #if defined(ARDUINO) PRINT_STR ("Elapsed time = "); PRINT_DEC (elapsed); PRINTLN_STR(" ms"); #endif PRINT_STR ("pi = "); print_number(d, w, digits); PRINTLN_STR(""); PRINTLN_STR(""); } // void print_pi() // Machin's formula // // pi/4 = 4 * arccot(5) - arccot(239) // void Machin(mp8_t a1, mp8_t a2) { mp8_setlen(LEN_Machin); // set MP array length calc_acot(a1, N1_Machin, 5, 0, 0); // arccot(5) calc_acot(a2, N2_Machin, 239, 0, 0); // arccot(239) mp8_mul(a1, 4); // 4 * arccot(5) mp8_sub(a1, a2); // 4 * arccot(5) - arccot(239) mp8_mul(a1, 4); // pi = 4 * (pi/4) print_pi(a1, a2, Ndigits_Machin, "Machin"); } // void Machin() // Gauss's formula // // pi/4 = 12 * arccot(18) + 8 * arccot(57) - 5 * arccot(239) // void Gauss(mp8_t a1, mp8_t a2) { mp8_setlen(LEN_Gauss); // set MP array length calc_acot(a1, N1_Gauss, 18, 0, 0); // arccot(18) calc_acot(a2, N2_Gauss, 57, 0, 0); // arccot(57) mp8_mul(a1, 12); // 12 * arccot(18) mp8_mul(a2, 8); // 8 * arccot(57) mp8_add(a1, a2); // 12 * arccot(18) + 8 * arccot(57) calc_acot(a2, N2_Gauss, 239, 0, 0); // arccot(239) mp8_mul(a2, 5); // 5 * arccot(239) mp8_sub(a1, a2); // -5 * arccot(239) mp8_mul(a1, 4); // pi = 4 * (pi/4) print_pi(a1, a2, Ndigits_Gauss, "Gauss"); } // void Gauss() // Stoermer's formula // // pi/4 = 44 * arccot(57) + 7 * arccot(239) // - 12 * arccot(682) + 24 * arccot(12943) // void Stoermer(mp8_t a1, mp8_t a2) { mp8_setlen(LEN_Stoermer); // set MP array length calc_acot(a1, N1_Stoermer, 57, 0, 0); // arccot(57) calc_acot(a2, N2_Stoermer, 239, 0, 0); // arccot(239) mp8_mul(a1, 44); // 44 * arccot(57) mp8_mul(a2, 7); // 7 * arccot(239) mp8_add(a1, a2); // 44*arccot(57) + 7*arccot(239) calc_acot(a2, N3_Stoermer, 22, 31, 0); // arccot(682) mp8_mul(a2, 12); // 12 * arccot(682) mp8_sub(a1, a2); // -12*arccot(682) calc_acot(a2, N4_Stoermer, 7, 43, 43); // arccot(12943) mp8_mul(a2, 24); // 24 * arccot(12943) mp8_add(a1, a2); // +24*arccot(12943) mp8_mul(a1, 4); // pi = 4 * (pi/4) print_pi(a1, a2, Ndigits_Stoermer, "Stoermer"); } // void Stoermer() // // pi/4 = 227 * arccot(255) - 100 * arccot(682) // + 44 * arccot(2072) + 51 * arccot(2943) // - 27 * arccot(12943) + 88 * arccot(16432) // void atan255(mp8_t a1, mp8_t a2) { mp8_setlen(LEN_atan255); // set MP array length calc_acot(a1, N1_atan255, 255, 0, 0); // arccot(255) calc_acot(a2, N2_atan255, 22, 31, 0); // arccot(682) mp8_mul(a1,227); // 227 * arccot(255) mp8_mul(a2,100); // 100 * arccot(682) mp8_sub(a1, a2); // 227*arccot(255) - 100*arccot(682) calc_acot(a2, N3_atan255, 56, 37, 0); // arccot(2072) mp8_mul(a2, 44); // 44 * arccot(2072) mp8_add(a1, a2); // +44*arccot(2072) calc_acot(a2, N4_atan255, 27, 109, 0); // arccot(2943) mp8_mul(a2, 51); // 51 * arccot(2943) mp8_add(a1, a2); // +51*arccot(2943) calc_acot(a2, N5_atan255, 7, 43, 43); // arccot(12943) mp8_mul(a2, 27); // 27 * arccot(12943) mp8_sub(a1, a2); // -27*arccot(12943) calc_acot(a2, N6_atan255, 16, 13, 79); // arccot(16432) mp8_mul(a2, 88); // 88 * arccot(16432) mp8_add(a1, a2); // +88*arccot(16432) mp8_mul(a1, 4); // pi = 4 * (pi/4) print_pi(a1, a2, Ndigits_atan255, "atan255"); } // void atan255() void Spigot( mp8_t num ) // MP work { const uint8_t base = 100; // n-ary number const uint8_t nterms = 128; // number of terms const uint8_t n_step = 4; // calculation interval register uint8_t i; // loop index register uint8_t n; // loop index register uint16_t temp; // temporary work / carry register uint8_t out; // output digits / carry adjust register uint8_t denom; // denominator of series for( i = 0 ; i < nterms ; i++ ) { num[i] = base / 5; // initalize array } PRINT_STR ("Spigot (nterms = "); PRINT_DEC (nterms); PRINTLN_STR(")"); PRINT_STR ("pi = "); out = 0; for( n = nterms ; n >= (nterms-2*Ndigits_Spigot); n -= n_step ) { temp = 0; for( i = n - 1 ; i > 0 ; i-- ) { denom = 2 * i - 1; temp = temp * i + num[i] * base; num[i] = temp % denom; temp /= denom; } i = (out + temp / base); PRINT_DEC(i / 10); if (0 == ((nterms-n) % (n_step * 5))) { PRINT_STR((nterms == n) ? ".\r\xa" : " "); } // if PRINT_DEC(i % 10); out = temp % base; } PRINTLN_STR(""); PRINTLN_STR(""); } // void Spigot() void setup_serial( void ) { #if defined(ARDUINO) Serial.begin(38400); // 38.4 kbps #endif } // void setup_serial() void setup( void ) { setup_serial(); Spigot (a1); Machin (a1, a2); Gauss (a1, a2); Stoermer(a1, a2); atan255 (a1, a2); } // void setup() void loop( void ) { } // void loop() #if !defined(ARDUINO) int main() { setup(); return(0); } // int main() #endif