47 #include "NE10_types.h"
48 #include "NE10_macros.h"
53 ne10_int32_t * factors,
57 ne10_int32_t fstride, mstride, N;
58 ne10_int32_t fstride1;
59 ne10_int32_t f_count, m_count;
60 ne10_int32_t stage_count;
71 const ne10_float32_t TW_81 = 0.70710678;
72 const ne10_float32_t TW_81N = -0.70710678;
75 stage_count = factors[0];
77 mstride = factors[ (stage_count << 1) - 1 ];
78 N = factors[ stage_count << 1 ];
88 fstride1 = fstride >> 2;
91 for (f_count = 0; f_count < fstride1; f_count ++)
93 Fout1 = & Fout[ f_count * 8 ];
95 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
96 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
97 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
98 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
99 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
100 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
101 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
102 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
103 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
104 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
105 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
106 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
107 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
108 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
109 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
110 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
113 scratch[0] = scratch_in[0];
114 scratch[1] = scratch_in[1];
116 scratch[2] = scratch_in[2];
117 scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81;
118 scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81;
120 scratch[4] = scratch_in[4];
121 scratch[5].r = scratch_in[5].i;
122 scratch[5].i = -scratch_in[5].r;
124 scratch[6].r = scratch_in[6].r;
125 scratch[6].i = scratch_in[6].i;
126 scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81N;
127 scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81N;
130 scratch[8].r = scratch[0].r + scratch[4].r;
131 scratch[8].i = scratch[0].i + scratch[4].i;
132 scratch[9].r = scratch[1].r + scratch[5].r;
133 scratch[9].i = scratch[1].i + scratch[5].i;
135 scratch[10].r = scratch[0].r - scratch[4].r;
136 scratch[10].i = scratch[0].i - scratch[4].i;
137 scratch[11].r = scratch[1].r - scratch[5].r;
138 scratch[11].i = scratch[1].i - scratch[5].i;
141 scratch[12].r = scratch[2].r + scratch[6].r;
142 scratch[12].i = scratch[2].i + scratch[6].i;
143 scratch[13].r = scratch[3].r + scratch[7].r;
144 scratch[13].i = scratch[3].i + scratch[7].i;
146 scratch[14].r = scratch[2].r - scratch[6].r;
147 scratch[14].i = scratch[2].i - scratch[6].i;
148 scratch[15].r = scratch[3].r - scratch[7].r;
149 scratch[15].i = scratch[3].i - scratch[7].i;
152 scratch_out[4].r = scratch[8].r - scratch[12].r;
153 scratch_out[4].i = scratch[8].i - scratch[12].i;
154 scratch_out[5].r = scratch[9].r - scratch[13].r;
155 scratch_out[5].i = scratch[9].i - scratch[13].i;
158 scratch_out[0].r = scratch[8].r + scratch[12].r;
159 scratch_out[0].i = scratch[8].i + scratch[12].i;
160 scratch_out[1].r = scratch[9].r + scratch[13].r;
161 scratch_out[1].i = scratch[9].i + scratch[13].i;
164 scratch_out[2].r = scratch[10].r + scratch[14].i;
165 scratch_out[2].i = scratch[10].i - scratch[14].r;
166 scratch_out[3].r = scratch[11].r + scratch[15].i;
167 scratch_out[3].i = scratch[11].i - scratch[15].r;
170 scratch_out[6].r = scratch[10].r - scratch[14].i;
171 scratch_out[6].i = scratch[10].i + scratch[14].r;
172 scratch_out[7].r = scratch[11].r - scratch[15].i;
173 scratch_out[7].i = scratch[11].i + scratch[15].r;
176 Fout1[0] = scratch_out[0];
177 Fout1[1] = scratch_out[1];
178 Fout1[2] = scratch_out[2];
179 Fout1[3] = scratch_out[3];
180 Fout1[4] = scratch_out[4];
181 Fout1[5] = scratch_out[5];
182 Fout1[6] = scratch_out[6];
183 Fout1[7] = scratch_out[7];
200 for (f_count = fstride; f_count ; f_count --)
203 scratch_in[0] = *Fin1;
204 Fin2 = Fin1 + fstride;
205 scratch_in[1] = *Fin2;
206 Fin2 = Fin2 + fstride;
207 scratch_in[2] = *Fin2;
208 Fin2 = Fin2 + fstride;
209 scratch_in[3] = *Fin2;
214 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
215 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
217 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
218 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
221 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
222 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
224 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
225 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
228 scratch_out[2].r = scratch[0].r - scratch[2].r;
229 scratch_out[2].i = scratch[0].i - scratch[2].i;
232 scratch_out[0].r = scratch[0].r + scratch[2].r;
233 scratch_out[0].i = scratch[0].i + scratch[2].i;
236 scratch_out[1].r = scratch[1].r + scratch[3].i;
237 scratch_out[1].i = scratch[1].i - scratch[3].r;
240 scratch_out[3].r = scratch[1].r - scratch[3].i;
241 scratch_out[3].i = scratch[1].i + scratch[3].r;
244 * Fout1 ++ = scratch_out[0];
245 * Fout1 ++ = scratch_out[1];
246 * Fout1 ++ = scratch_out[2];
247 * Fout1 ++ = scratch_out[3];
267 for (; stage_count > 1 ; stage_count--)
270 for (f_count = 0; f_count < fstride; f_count ++)
272 Fout1 = & Fout[ f_count * mstride << 2 ];
274 for (m_count = mstride; m_count ; m_count --)
277 scratch_tw[0] = *tw1;
279 scratch_tw[1] = *tw2;
281 scratch_tw[2] = *tw2;
282 scratch_in[0] = * Fin1;
284 scratch_in[1] = * Fin2;
286 scratch_in[2] = * Fin2;
288 scratch_in[3] = * Fin2;
292 scratch[0] = scratch_in[0];
293 scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
294 scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
296 scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
297 scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
299 scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
300 scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
303 scratch[4].r = scratch[0].r + scratch[2].r;
304 scratch[4].i = scratch[0].i + scratch[2].i;
306 scratch[5].r = scratch[0].r - scratch[2].r;
307 scratch[5].i = scratch[0].i - scratch[2].i;
310 scratch[6].r = scratch[1].r + scratch[3].r;
311 scratch[6].i = scratch[1].i + scratch[3].i;
313 scratch[7].r = scratch[1].r - scratch[3].r;
314 scratch[7].i = scratch[1].i - scratch[3].i;
317 scratch_out[2].r = scratch[4].r - scratch[6].r;
318 scratch_out[2].i = scratch[4].i - scratch[6].i;
321 scratch_out[0].r = scratch[4].r + scratch[6].r;
322 scratch_out[0].i = scratch[4].i + scratch[6].i;
325 scratch_out[1].r = scratch[5].r + scratch[7].i;
326 scratch_out[1].i = scratch[5].i - scratch[7].r;
329 scratch_out[3].r = scratch[5].r - scratch[7].i;
330 scratch_out[3].i = scratch[5].i + scratch[7].r;
333 *Fout1 = scratch_out[0];
334 Fout2 = Fout1 + mstride;
335 *Fout2 = scratch_out[1];
337 *Fout2 = scratch_out[2];
339 *Fout2 = scratch_out[3];
363 for (f_count = 0; f_count < fstride; f_count ++)
366 for (m_count = mstride; m_count ; m_count --)
369 scratch_tw[0] = *tw1;
371 scratch_tw[1] = *tw2;
373 scratch_tw[2] = *tw2;
374 scratch_in[0] = * Fin1;
376 scratch_in[1] = * Fin2;
378 scratch_in[2] = * Fin2;
380 scratch_in[3] = * Fin2;
384 scratch[0] = scratch_in[0];
385 scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
386 scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
388 scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
389 scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
391 scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
392 scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
395 scratch[4].r = scratch[0].r + scratch[2].r;
396 scratch[4].i = scratch[0].i + scratch[2].i;
398 scratch[5].r = scratch[0].r - scratch[2].r;
399 scratch[5].i = scratch[0].i - scratch[2].i;
402 scratch[6].r = scratch[1].r + scratch[3].r;
403 scratch[6].i = scratch[1].i + scratch[3].i;
405 scratch[7].r = scratch[1].r - scratch[3].r;
406 scratch[7].i = scratch[1].i - scratch[3].i;
409 scratch_out[2].r = scratch[4].r - scratch[6].r;
410 scratch_out[2].i = scratch[4].i - scratch[6].i;
413 scratch_out[0].r = scratch[4].r + scratch[6].r;
414 scratch_out[0].i = scratch[4].i + scratch[6].i;
417 scratch_out[1].r = scratch[5].r + scratch[7].i;
418 scratch_out[1].i = scratch[5].i - scratch[7].r;
421 scratch_out[3].r = scratch[5].r - scratch[7].i;
422 scratch_out[3].i = scratch[5].i + scratch[7].r;
425 *Fout1 = scratch_out[0];
427 *Fout2 = scratch_out[1];
429 *Fout2 = scratch_out[2];
431 *Fout2 = scratch_out[3];
443 ne10_int32_t * factors,
447 ne10_int32_t fstride, mstride, N;
448 ne10_int32_t fstride1;
449 ne10_int32_t f_count, m_count;
450 ne10_int32_t stage_count;
451 ne10_float32_t one_by_nfft;
462 const ne10_float32_t TW_81 = 0.70710678;
463 const ne10_float32_t TW_81N = -0.70710678;
466 stage_count = factors[0];
467 fstride = factors[1];
468 mstride = factors[ (stage_count << 1) - 1 ];
469 N = factors[ stage_count << 1 ];
470 one_by_nfft = (1.0f / (ne10_float32_t) (fstride * N));
480 fstride1 = fstride >> 2;
483 for (f_count = 0; f_count < fstride1; f_count ++)
485 Fout1 = & Fout[ f_count * 8 ];
487 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
488 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
489 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
490 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
491 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
492 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
493 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
494 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
495 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
496 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
497 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
498 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
499 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
500 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
501 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
502 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
506 scratch[0] = scratch_in[0];
507 scratch[1] = scratch_in[1];
509 scratch[2] = scratch_in[2];
510 scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81;
511 scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81;
513 scratch[4] = scratch_in[4];
514 scratch[5].r = -scratch_in[5].i;
515 scratch[5].i = scratch_in[5].r;
517 scratch[6].r = scratch_in[6].r;
518 scratch[6].i = scratch_in[6].i;
519 scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81N;
520 scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81N;
523 scratch[8].r = scratch[0].r + scratch[4].r;
524 scratch[8].i = scratch[0].i + scratch[4].i;
525 scratch[9].r = scratch[1].r + scratch[5].r;
526 scratch[9].i = scratch[1].i + scratch[5].i;
528 scratch[10].r = scratch[0].r - scratch[4].r;
529 scratch[10].i = scratch[0].i - scratch[4].i;
530 scratch[11].r = scratch[1].r - scratch[5].r;
531 scratch[11].i = scratch[1].i - scratch[5].i;
534 scratch[12].r = scratch[2].r + scratch[6].r;
535 scratch[12].i = scratch[2].i + scratch[6].i;
536 scratch[13].r = scratch[3].r + scratch[7].r;
537 scratch[13].i = scratch[3].i + scratch[7].i;
539 scratch[14].r = scratch[2].r - scratch[6].r;
540 scratch[14].i = scratch[2].i - scratch[6].i;
541 scratch[15].r = scratch[3].r - scratch[7].r;
542 scratch[15].i = scratch[3].i - scratch[7].i;
545 scratch_out[4].r = scratch[8].r - scratch[12].r;
546 scratch_out[4].i = scratch[8].i - scratch[12].i;
547 scratch_out[5].r = scratch[9].r - scratch[13].r;
548 scratch_out[5].i = scratch[9].i - scratch[13].i;
551 scratch_out[0].r = scratch[8].r + scratch[12].r;
552 scratch_out[0].i = scratch[8].i + scratch[12].i;
553 scratch_out[1].r = scratch[9].r + scratch[13].r;
554 scratch_out[1].i = scratch[9].i + scratch[13].i;
557 scratch_out[2].r = scratch[10].r - scratch[14].i;
558 scratch_out[2].i = scratch[10].i + scratch[14].r;
559 scratch_out[3].r = scratch[11].r - scratch[15].i;
560 scratch_out[3].i = scratch[11].i + scratch[15].r;
563 scratch_out[6].r = scratch[10].r + scratch[14].i;
564 scratch_out[6].i = scratch[10].i - scratch[14].r;
565 scratch_out[7].r = scratch[11].r + scratch[15].i;
566 scratch_out[7].i = scratch[11].i - scratch[15].r;
569 Fout1[0] = scratch_out[0];
570 Fout1[1] = scratch_out[1];
571 Fout1[2] = scratch_out[2];
572 Fout1[3] = scratch_out[3];
573 Fout1[4] = scratch_out[4];
574 Fout1[5] = scratch_out[5];
575 Fout1[6] = scratch_out[6];
576 Fout1[7] = scratch_out[7];
585 if (stage_count == 0)
587 for (f_count = 0; f_count < 8; f_count++)
589 Fout[f_count].r *= one_by_nfft;
590 Fout[f_count].i *= one_by_nfft;
602 for (f_count = fstride; f_count ; f_count --)
605 scratch_in[0] = *Fin1;
606 Fin2 = Fin1 + fstride;
607 scratch_in[1] = *Fin2;
608 Fin2 = Fin2 + fstride;
609 scratch_in[2] = *Fin2;
610 Fin2 = Fin2 + fstride;
611 scratch_in[3] = *Fin2;
616 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
617 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
619 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
620 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
623 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
624 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
626 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
627 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
630 scratch_out[2].r = scratch[0].r - scratch[2].r;
631 scratch_out[2].i = scratch[0].i - scratch[2].i;
634 scratch_out[0].r = scratch[0].r + scratch[2].r;
635 scratch_out[0].i = scratch[0].i + scratch[2].i;
638 scratch_out[1].r = scratch[1].r - scratch[3].i;
639 scratch_out[1].i = scratch[1].i + scratch[3].r;
642 scratch_out[3].r = scratch[1].r + scratch[3].i;
643 scratch_out[3].i = scratch[1].i - scratch[3].r;
646 * Fout1 ++ = scratch_out[0];
647 * Fout1 ++ = scratch_out[1];
648 * Fout1 ++ = scratch_out[2];
649 * Fout1 ++ = scratch_out[3];
660 if (stage_count == 0)
662 for (f_count = 0; f_count < 4; f_count++)
664 Fout[f_count].r *= one_by_nfft;
665 Fout[f_count].i *= one_by_nfft;
677 for (; stage_count > 1 ; stage_count--)
680 for (f_count = 0; f_count < fstride; f_count ++)
682 Fout1 = & Fout[ f_count * mstride << 2 ];
684 for (m_count = mstride; m_count ; m_count --)
687 scratch_tw[0] = *tw1;
689 scratch_tw[1] = *tw2;
691 scratch_tw[2] = *tw2;
692 scratch_in[0] = * Fin1;
694 scratch_in[1] = * Fin2;
696 scratch_in[2] = * Fin2;
698 scratch_in[3] = * Fin2;
702 scratch[0] = scratch_in[0];
703 scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
704 scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
706 scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
707 scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
709 scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
710 scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
713 scratch[4].r = scratch[0].r + scratch[2].r;
714 scratch[4].i = scratch[0].i + scratch[2].i;
716 scratch[5].r = scratch[0].r - scratch[2].r;
717 scratch[5].i = scratch[0].i - scratch[2].i;
720 scratch[6].r = scratch[1].r + scratch[3].r;
721 scratch[6].i = scratch[1].i + scratch[3].i;
723 scratch[7].r = scratch[1].r - scratch[3].r;
724 scratch[7].i = scratch[1].i - scratch[3].i;
727 scratch_out[2].r = scratch[4].r - scratch[6].r;
728 scratch_out[2].i = scratch[4].i - scratch[6].i;
731 scratch_out[0].r = scratch[4].r + scratch[6].r;
732 scratch_out[0].i = scratch[4].i + scratch[6].i;
735 scratch_out[1].r = scratch[5].r - scratch[7].i;
736 scratch_out[1].i = scratch[5].i + scratch[7].r;
739 scratch_out[3].r = scratch[5].r + scratch[7].i;
740 scratch_out[3].i = scratch[5].i - scratch[7].r;
743 *Fout1 = scratch_out[0];
744 Fout2 = Fout1 + mstride;
745 *Fout2 = scratch_out[1];
747 *Fout2 = scratch_out[2];
749 *Fout2 = scratch_out[3];
773 for (f_count = 0; f_count < fstride; f_count ++)
776 for (m_count = mstride; m_count ; m_count --)
779 scratch_tw[0] = *tw1;
781 scratch_tw[1] = *tw2;
783 scratch_tw[2] = *tw2;
784 scratch_in[0] = * Fin1;
786 scratch_in[1] = * Fin2;
788 scratch_in[2] = * Fin2;
790 scratch_in[3] = * Fin2;
794 scratch[0] = scratch_in[0];
795 scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
796 scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
798 scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
799 scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
801 scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
802 scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
805 scratch[4].r = scratch[0].r + scratch[2].r;
806 scratch[4].i = scratch[0].i + scratch[2].i;
808 scratch[5].r = scratch[0].r - scratch[2].r;
809 scratch[5].i = scratch[0].i - scratch[2].i;
812 scratch[6].r = scratch[1].r + scratch[3].r;
813 scratch[6].i = scratch[1].i + scratch[3].i;
815 scratch[7].r = scratch[1].r - scratch[3].r;
816 scratch[7].i = scratch[1].i - scratch[3].i;
819 scratch_out[2].r = (scratch[4].r - scratch[6].r) * one_by_nfft;
820 scratch_out[2].i = (scratch[4].i - scratch[6].i) * one_by_nfft;
823 scratch_out[0].r = (scratch[4].r + scratch[6].r) * one_by_nfft;
824 scratch_out[0].i = (scratch[4].i + scratch[6].i) * one_by_nfft;
827 scratch_out[1].r = (scratch[5].r - scratch[7].i) * one_by_nfft;
828 scratch_out[1].i = (scratch[5].i + scratch[7].r) * one_by_nfft;
831 scratch_out[3].r = (scratch[5].r + scratch[7].i) * one_by_nfft;
832 scratch_out[3].i = (scratch[5].i - scratch[7].r) * one_by_nfft;
835 *Fout1 = scratch_out[0];
837 *Fout2 = scratch_out[1];
839 *Fout2 = scratch_out[2];
841 *Fout2 = scratch_out[3];
862 dst[0].r = tdc.r + tdc.i;
863 dst[ncfft].r = tdc.r - tdc.i;
864 dst[ncfft].i = dst[0].i = 0;
866 for (k = 1; k <= ncfft / 2 ; ++k)
869 fpnk.r = src[ncfft - k].r;
870 fpnk.i = - src[ncfft - k].i;
872 f1k.r = fpk.r + fpnk.r;
873 f1k.i = fpk.i + fpnk.i;
875 f2k.r = fpk.r - fpnk.r;
876 f2k.i = fpk.i - fpnk.i;
878 tw.r = f2k.r * (twiddles[k - 1]).r - f2k.i * (twiddles[k - 1]).i;
879 tw.i = f2k.r * (twiddles[k - 1]).i + f2k.i * (twiddles[k - 1]).r;
881 dst[k].r = (f1k.r + tw.r) * 0.5f;
882 dst[k].i = (f1k.i + tw.i) * 0.5f;
883 dst[ncfft - k].r = (f1k.r - tw.r) * 0.5f;
884 dst[ncfft - k].i = (tw.i - f1k.i) * 0.5f;
898 dst[0].r = (src[0].r + src[ncfft].r) * 0.5f;
899 dst[0].i = (src[0].r - src[ncfft].r) * 0.5f;
901 for (k = 1; k <= ncfft / 2; k++)
904 fnkc.r = src[ncfft - k].r;
905 fnkc.i = -src[ncfft - k].i;
907 fek.r = fk.r + fnkc.r;
908 fek.i = fk.i + fnkc.i;
910 tmp.r = fk.r - fnkc.r;
911 tmp.i = fk.i - fnkc.i;
913 fok.r = tmp.r * twiddles[k - 1].r + tmp.i * twiddles[k - 1].i;
914 fok.i = tmp.i * twiddles[k - 1].r - tmp.r * twiddles[k - 1].i;
916 dst[k].r = (fek.r + fok.r) * 0.5f;
917 dst[k].i = (fek.i + fok.i) * 0.5f;
919 dst[ncfft - k].r = (fek.r - fok.r) * 0.5f;
920 dst[ncfft - k].i = (fok.i - fek.i) * 0.5f;
1001 +
sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2)
1004 + NE10_FFT_BYTE_ALIGNMENT;
1018 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1019 st->factors = (ne10_int32_t*) address;
1021 st->buffer = st->twiddles + nfft;
1024 ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1025 if (result == NE10_ERR)
1033 ne10_int32_t stage_count = st->factors[0];
1034 ne10_int32_t algorithm_flag = st->factors[2 * (stage_count + 1)];
1037 if (algorithm_flag == NE10_FFT_ALG_ANY)
1039 result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_EIGHT);
1040 if (result == NE10_ERR)
1049 ne10_fft_generate_twiddles_float32 (st->twiddles, st->factors, nfft);
1068 ne10_int32_t inverse_fft)
1070 ne10_int32_t stage_count = cfg->factors[0];
1071 ne10_int32_t algorithm_flag = cfg->factors[2 * (stage_count + 1)];
1073 assert ((algorithm_flag == NE10_FFT_ALG_24)
1074 || (algorithm_flag == NE10_FFT_ALG_ANY));
1076 switch (algorithm_flag)
1078 case NE10_FFT_ALG_24:
1081 ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1085 ne10_mixed_radix_butterfly_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1088 case NE10_FFT_ALG_ANY:
1091 ne10_mixed_radix_generic_butterfly_inverse_float32_c (fout, fin,
1096 ne10_mixed_radix_generic_butterfly_float32_c (fout, fin,
1185 #if (NE10_UNROLL_LEVEL == 0)
1196 ne10_int32_t ncfft = nfft >> 1;
1199 +
sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2)
1203 + NE10_FFT_BYTE_ALIGNMENT;
1210 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1211 st->factors = (ne10_int32_t*) address;
1213 st->super_twiddles = st->twiddles + ncfft;
1214 st->buffer = st->super_twiddles + (ncfft / 2);
1217 ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1218 if (result == NE10_ERR)
1225 ne10_int32_t *factors = st->factors;
1228 ne10_int32_t stage_count = factors[0];
1229 ne10_int32_t fstride1 = factors[1];
1230 ne10_int32_t fstride2 = fstride1 * 2;
1231 ne10_int32_t fstride3 = fstride1 * 3;
1234 const ne10_float32_t pi = NE10_PI;
1235 ne10_float32_t phase1;
1236 ne10_float32_t phase2;
1237 ne10_float32_t phase3;
1239 for (i = stage_count - 1; i > 0; i--)
1244 m = factors[2 * i + 1];
1246 for (j = 0; j < m; j++)
1248 phase1 = -2 * pi * fstride1 * j / ncfft;
1249 phase2 = -2 * pi * fstride2 * j / ncfft;
1250 phase3 = -2 * pi * fstride3 * j / ncfft;
1251 tw->r = (ne10_float32_t) cos (phase1);
1252 tw->i = (ne10_float32_t) sin (phase1);
1253 (tw + m)->r = (ne10_float32_t) cos (phase2);
1254 (tw + m)->i = (ne10_float32_t) sin (phase2);
1255 (tw + m * 2)->r = (ne10_float32_t) cos (phase3);
1256 (tw + m * 2)->i = (ne10_float32_t) sin (phase3);
1262 tw = st->super_twiddles;
1263 for (i = 0; i < ncfft / 2; i++)
1265 phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1266 tw->r = (ne10_float32_t) cos (phase1);
1267 tw->i = (ne10_float32_t) sin (phase1);
1286 ne10_float32_t *fin,
1291 ne10_mixed_radix_butterfly_float32_c (tmpbuf, (
ne10_fft_cpx_float32_t*) fin, cfg->factors, cfg->twiddles, fout);
1292 ne10_fft_split_r2c_1d_float32 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft);
1312 ne10_fft_split_c2r_1d_float32 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft);
1313 ne10_mixed_radix_butterfly_inverse_float32_c ( (
ne10_fft_cpx_float32_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2);
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
structure for the floating point FFT state
ne10_int32_t is_forward_scaled
@biref Flag to control scaling behaviour in forward floating point complex FFT.
ne10_int32_t is_backward_scaled
@biref Flag to control scaling behaviour in backward floating point complex FFT.