新版FM音源プログラム (27)
gcc の「インライン・アセンブラ」で書いた ARMv7-M (Cortex-M3) 版の acc_calc_slot() 関数のリストを下に示します。 「中身」は armcc 版と同等です。
#include "slot.h" // // gcc inline assembly language function // int32_t acc_calc_slot(op_prop_t *o_p, int num_slot) { __asm__ __volatile__ ( // Use UAL (Unified Assembler Language) syntax " .syntax unified \n\t" " movs r8,#0 \n\t" // clear r8=acc_R " mov r9,r8 \n\t" // clear r9=acc_L "1: \n\t" // // series FM modulator slot // " ldr r2,[r0,%[ph_inc]] \n\t" // r2=ph_inc " ldr r3,[r0,%[ph_acc]] \n\t" // r3=ph_acc " ldr r4,[r0,%[mod_in]] \n\t" // r4=mod_in " ldr r5,[r0,%[stab_p]] \n\t" // r5=stab_p " adds r2, r2,r3 \n\t" // r2=ph_inc+ph_acc " ldrh r3,[r0,%[ind_mask]] \n\t" // r3=ind_mask " str r2,[r0,%[ph_acc]] \n\t" // update ph_acc " adds r4, r4,r2 \n\t" // r4=ph_inc+ph_acc+mod_in (=phh) " ands r3, r3,r4,lsr %[acc_sft] \n\t" // r3 & (r4 >> 8) " ldr r2,[r0,%[ol_lin]] \n\t" // r2=ol_lin " ldrsh r3,[r5,r3] \n\t" // r3=*(int16_t *)(stab_p+r3) // // 2-tap FIR filter for feedback // " ldrsh r5,[r0,%[op_out]] \n\t" // r5=op_out (=prev_out) " muls r2, r3,r2 \n\t" // r2=r3*ol_lin " asrs r4, r2,#15 \n\t" // r4=(r2 >> 15) (=out) " ldrh r3,[r0,%[mod_mul]] \n\t" // r3=mod_mul " strh r4,[r0,%[op_out]] \n\t" // op_out=out " adds r5, r5,r4 \n\t" // r5=out+prev_out " muls r3, r5,r3 \n\t" // r3=r5*mod_mul " str r3,[r0,%[mod_in]] \n\t" // mod_in=r3 // // series FM carrier slot // " ldr r2,[r0,%[ph_inc]+%c[S]] \n\t" // r2=ph_inc " ldr r3,[r0,%[ph_acc]+%c[S]] \n\t" // r3=ph_acc " ldrh r5,[r0,%[mod_mul]+%c[S]] \n\t" // r5=mod_mul " adds r2, r2,r3 \n\t" // r2=ph_inc+ph_acc " str r2,[r0,%[ph_acc]+%c[S]] \n\t" // update ph_acc " mla r2, r4, r5, r2 \n\t" // r2+=(r4*r5) " ldrh r3,[r0,%[ind_mask]+%c[S]] \n\t" // r3=ind_mask " ldr r5,[r0,%[stab_p]+%c[S]] \n\t" // r5=stab_p " and r3, r3,r2,lsr %[acc_sft] \n\t" // r3 & (r2 >> 8) " ldr r2,[r0,%[ol_lin]+%c[S]] \n\t" // r2=ol_lin " ldrsh r3,[r5,r3] \n\t" // r3=*(int16_t *)(stab_p+r3) " muls r2, r3,r2 \n\t" // r2=r3*ol_lin " asrs r2, r2,#15 \n\t" // r2>>=15 (=out) " ldrh r6,[r0,%[L_vol]+%c[S]] \n\t" // r6=L_vol1 " ldrh r7,[r0,%[L_vol]+2+%c[S]] \n\t" // r7=L_vol0 " ldrh r3,[r0,%[R_vol]+%c[S]] \n\t" // r3=R_vol1 " ldrh r5,[r0,%[R_vol]+2+%c[S]] \n\t" // r5=R_vol0 " strh r2,[r0,%[op_out]+%c[S]] \n\t" // op_out=out " mla r8, r2, r3, r8 \n\t" // acc_R+=(op_out1*R_vol1) " mla r8, r4, r5, r8 \n\t" // acc_R+=(op_out0*R_vol0) " mla r9, r2, r6, r9 \n\t" // acc_L+=(op_out1*L_vol1) " mla r9, r4, r7, r9 \n\t" // acc_L+=(op_out0*L_vol0) " adds r0, r0,%[S]*2 \n\t" // adv. to next slot pair " subs r1, r1,#2 \n\t" // decr. loop counter " bgt 1b \n\t" // more to do // post scaling and saturate to 16 bit " ssat r1, #16, r9, asr %[vol_sft] \n\t" // L-ch " ssat r0, #16, r8, asr %[vol_sft] \n\t" // R-ch // pack two halfwords (16 bit, 16 bit) to single word (32 bit) " bfi r0, r1,#16,#16 \n\t" // result[31:16]=acc_L[15:0] // output reg list : "+r" (o_p), // [arg1] = r0 = (op_prop_t *) o_p "+r" (num_slot) // [arg2] = r1 = (int) num_slot // input parameter list ("I" for offset constant) : [acc_sft] "I" (PH_ACC_FRAC_BITS-1), [vol_sft] "I" (LR_VOL_SHIFT), [ph_inc] "I" (DEF_OFS(ph_inc)), [ph_acc] "I" (DEF_OFS(ph_acc)), [mod_in] "I" (DEF_OFS(mod_in)), [stab_p] "I" (DEF_OFS(stab_p)), [ind_mask] "I" (DEF_OFS(ind_mask)), [op_out] "I" (DEF_OFS(op_out)), [ol_lin] "I" (DEF_OFS(ol_lin)), [mod_mul] "I" (DEF_OFS(mod_mul)), [L_vol] "I" (DEF_OFS(L_vol)), [R_vol] "I" (DEF_OFS(R_vol)), [S] "I" (sizeof(op_prop_t)) // clobber reg list : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" );// __asm__ return((int32_t) o_p); } // int32_t acc_calc_slot()
PSoC5LP Prototyping Kit (CY8CKIT-059) での所要サイクル数の測定結果は下のようになっています。
// STAB in SRAM // PSoC Creator 4.0 SP1 (gcc 4.9.3), NSLOT=128 // 60 cycle / 2slot (CY8C5888 (CM3) @ 16 MHz, flash wait=0) // 60 cycle / 2slot (CY8C5888 (CM3) @ 80 MHz, flash wait=4) // STAB in flash // PSoC Creator 4.0 SP1 (gcc 4.9.3), NSLOT=128 // 62 cycle / 2slot (CY8C5888 (CM3) @ 16 MHz, flash wait=0) // 72 cycle / 2slot (CY8C5888 (CM3) @ 80 MHz, flash wait=4)