Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_fft_int16.c
1 /*
2  * Copyright 2013-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_int16.c
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
50 
51 
52 static void ne10_mixed_radix_butterfly_int16_c (ne10_fft_cpx_int16_t * Fout,
54  ne10_int32_t * factors,
55  ne10_fft_cpx_int16_t * twiddles,
56  ne10_fft_cpx_int16_t * buffer,
57  ne10_int32_t scaled_flag)
58 {
59  ne10_int32_t fstride, mstride, N;
60  ne10_int32_t fstride1;
61  ne10_int32_t f_count, m_count;
62  ne10_int32_t stage_count;
63 
64  ne10_fft_cpx_int16_t scratch_in[8];
65  ne10_fft_cpx_int16_t scratch_out[8];
66  ne10_fft_cpx_int16_t scratch[16];
67  ne10_fft_cpx_int16_t scratch_tw[6];
68 
69  ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
70  ne10_fft_cpx_int16_t *Fout_ls = Fout;
72  ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
73  const ne10_int32_t TW_81 = 23169;
74  const ne10_int32_t TW_81N = -23169;
75 
76 
77 
78  // init fstride, mstride, N, tw
79  stage_count = factors[0];
80  fstride = factors[1];
81  mstride = factors[ (stage_count << 1) - 1 ];
82  N = factors[ stage_count << 1 ]; // radix
83  tw = twiddles;
84 
85  // the first stage
86  Fin1 = Fin;
87  Fout1 = Fout;
88  if (N == 2) // length of FFT is 2^n (n is odd)
89  {
90  // radix 8
91  N = fstride >> 1; // 1/4 of length of FFT
92  fstride1 = fstride >> 2;
93 
94  Fin1 = Fin;
95  for (f_count = 0; f_count < fstride1; f_count ++)
96  {
97  Fout1 = & Fout[ f_count * 8 ];
98  // load
99  if (scaled_flag == 1)
100  {
101  NE10_F2I16_FIXDIV (Fin1[0], 8);
102  NE10_F2I16_FIXDIV (Fin1[0 + fstride], 8);
103  NE10_F2I16_FIXDIV (Fin1[fstride1], 8);
104  NE10_F2I16_FIXDIV (Fin1[fstride1 + fstride], 8);
105  NE10_F2I16_FIXDIV (Fin1[fstride1 * 2], 8);
106  NE10_F2I16_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
107  NE10_F2I16_FIXDIV (Fin1[fstride1 * 3], 8);
108  NE10_F2I16_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
109  }
110  scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
111  scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
112  scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
113  scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
114  scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
115  scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
116  scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
117  scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
118  scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
119  scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
120  scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
121  scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
122  scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
123  scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
124  scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
125  scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
126 
127  // radix 4 butterfly without twiddles
128  scratch[0] = scratch_in[0];
129  scratch[1] = scratch_in[1];
130 
131  scratch[2] = scratch_in[2];
132  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r + scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
133  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i - scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
134 
135  scratch[4] = scratch_in[4];
136  scratch[5].r = scratch_in[5].i;
137  scratch[5].i = -scratch_in[5].r;
138 
139  scratch[6].r = scratch_in[6].r;
140  scratch[6].i = scratch_in[6].i;
141  scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r - scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
142  scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i + scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
143 
144  // radix 2 butterfly
145  scratch[8].r = scratch[0].r + scratch[4].r;
146  scratch[8].i = scratch[0].i + scratch[4].i;
147  scratch[9].r = scratch[1].r + scratch[5].r;
148  scratch[9].i = scratch[1].i + scratch[5].i;
149 
150  scratch[10].r = scratch[0].r - scratch[4].r;
151  scratch[10].i = scratch[0].i - scratch[4].i;
152  scratch[11].r = scratch[1].r - scratch[5].r;
153  scratch[11].i = scratch[1].i - scratch[5].i;
154 
155  // radix 2 butterfly
156  scratch[12].r = scratch[2].r + scratch[6].r;
157  scratch[12].i = scratch[2].i + scratch[6].i;
158  scratch[13].r = scratch[3].r + scratch[7].r;
159  scratch[13].i = scratch[3].i + scratch[7].i;
160 
161  scratch[14].r = scratch[2].r - scratch[6].r;
162  scratch[14].i = scratch[2].i - scratch[6].i;
163  scratch[15].r = scratch[3].r - scratch[7].r;
164  scratch[15].i = scratch[3].i - scratch[7].i;
165 
166  // third result
167  scratch_out[4].r = scratch[8].r - scratch[12].r;
168  scratch_out[4].i = scratch[8].i - scratch[12].i;
169  scratch_out[5].r = scratch[9].r - scratch[13].r;
170  scratch_out[5].i = scratch[9].i - scratch[13].i;
171 
172  // first result
173  scratch_out[0].r = scratch[8].r + scratch[12].r;
174  scratch_out[0].i = scratch[8].i + scratch[12].i;
175  scratch_out[1].r = scratch[9].r + scratch[13].r;
176  scratch_out[1].i = scratch[9].i + scratch[13].i;
177 
178  // second result
179  scratch_out[2].r = scratch[10].r + scratch[14].i;
180  scratch_out[2].i = scratch[10].i - scratch[14].r;
181  scratch_out[3].r = scratch[11].r + scratch[15].i;
182  scratch_out[3].i = scratch[11].i - scratch[15].r;
183 
184  // forth result
185  scratch_out[6].r = scratch[10].r - scratch[14].i;
186  scratch_out[6].i = scratch[10].i + scratch[14].r;
187  scratch_out[7].r = scratch[11].r - scratch[15].i;
188  scratch_out[7].i = scratch[11].i + scratch[15].r;
189 
190  // store
191  Fout1[0] = scratch_out[0];
192  Fout1[1] = scratch_out[1];
193  Fout1[2] = scratch_out[2];
194  Fout1[3] = scratch_out[3];
195  Fout1[4] = scratch_out[4];
196  Fout1[5] = scratch_out[5];
197  Fout1[6] = scratch_out[6];
198  Fout1[7] = scratch_out[7];
199 
200  Fin1 += 1;
201  } // f_count
202  tw += 6;
203  mstride <<= 2;
204  fstride >>= 4;
205  stage_count -= 2;
206 
207  // swap
208  Ftmp = buffer;
209  buffer = Fout;
210  Fout = Ftmp;
211  }
212  else if (N == 4) // length of FFT is 2^n (n is even)
213  {
214  //fstride is nfft>>2
215  for (f_count = fstride; f_count ; f_count --)
216  {
217  // load
218  scratch_in[0] = *Fin1;
219  Fin2 = Fin1 + fstride;
220  scratch_in[1] = *Fin2;
221  Fin2 = Fin2 + fstride;
222  scratch_in[2] = *Fin2;
223  Fin2 = Fin2 + fstride;
224  scratch_in[3] = *Fin2;
225 
226  // radix 4 butterfly without twiddles
227  if (scaled_flag == 1)
228  {
229  NE10_F2I16_FIXDIV (scratch_in[0], 4);
230  NE10_F2I16_FIXDIV (scratch_in[1], 4);
231  NE10_F2I16_FIXDIV (scratch_in[2], 4);
232  NE10_F2I16_FIXDIV (scratch_in[3], 4);
233  }
234 
235  // radix 2 butterfly
236  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
237  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
238 
239  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
240  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
241 
242  // radix 2 butterfly
243  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
244  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
245 
246  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
247  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
248 
249  // third result
250  scratch_out[2].r = scratch[0].r - scratch[2].r;
251  scratch_out[2].i = scratch[0].i - scratch[2].i;
252 
253  // first result
254  scratch_out[0].r = scratch[0].r + scratch[2].r;
255  scratch_out[0].i = scratch[0].i + scratch[2].i;
256 
257  // second result
258  scratch_out[1].r = scratch[1].r + scratch[3].i;
259  scratch_out[1].i = scratch[1].i - scratch[3].r;
260 
261  // forth result
262  scratch_out[3].r = scratch[1].r - scratch[3].i;
263  scratch_out[3].i = scratch[1].i + scratch[3].r;
264 
265  // store
266  * Fout1 ++ = scratch_out[0];
267  * Fout1 ++ = scratch_out[1];
268  * Fout1 ++ = scratch_out[2];
269  * Fout1 ++ = scratch_out[3];
270 
271  Fin1++;
272  } // f_count
273 
274  N = fstride; // 1/4 of length of FFT
275 
276  // swap
277  Ftmp = buffer;
278  buffer = Fout;
279  Fout = Ftmp;
280 
281  // update address for other stages
282  stage_count--;
283  fstride >>= 2;
284  // end of first stage
285  }
286 
287 
288  // others but the last one
289  for (; stage_count > 1 ; stage_count--)
290  {
291  Fin1 = buffer;
292  for (f_count = 0; f_count < fstride; f_count ++)
293  {
294  Fout1 = & Fout[ f_count * mstride << 2 ];
295  tw1 = tw;
296  for (m_count = mstride; m_count ; m_count --)
297  {
298  // load
299  scratch_tw[0] = *tw1;
300  tw2 = tw1 + mstride;
301  scratch_tw[1] = *tw2;
302  tw2 += mstride;
303  scratch_tw[2] = *tw2;
304  scratch_in[0] = * Fin1;
305  Fin2 = Fin1 + N;
306  scratch_in[1] = * Fin2;
307  Fin2 += N;
308  scratch_in[2] = * Fin2;
309  Fin2 += N;
310  scratch_in[3] = * Fin2;
311  if (scaled_flag == 1)
312  {
313  NE10_F2I16_FIXDIV (scratch_in[0], 4);
314  NE10_F2I16_FIXDIV (scratch_in[1], 4);
315  NE10_F2I16_FIXDIV (scratch_in[2], 4);
316  NE10_F2I16_FIXDIV (scratch_in[3], 4);
317  }
318 
319  // radix 4 butterfly with twiddles
320 
321  scratch[0] = scratch_in[0];
322  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
323  - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
324  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
325  + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
326 
327  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
328  - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
329  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
330  + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
331 
332  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
333  - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
334  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
335  + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
336 
337  // radix 2 butterfly
338  scratch[4].r = scratch[0].r + scratch[2].r;
339  scratch[4].i = scratch[0].i + scratch[2].i;
340 
341  scratch[5].r = scratch[0].r - scratch[2].r;
342  scratch[5].i = scratch[0].i - scratch[2].i;
343 
344  // radix 2 butterfly
345  scratch[6].r = scratch[1].r + scratch[3].r;
346  scratch[6].i = scratch[1].i + scratch[3].i;
347 
348  scratch[7].r = scratch[1].r - scratch[3].r;
349  scratch[7].i = scratch[1].i - scratch[3].i;
350 
351  // third result
352  scratch_out[2].r = scratch[4].r - scratch[6].r;
353  scratch_out[2].i = scratch[4].i - scratch[6].i;
354 
355  // first result
356  scratch_out[0].r = scratch[4].r + scratch[6].r;
357  scratch_out[0].i = scratch[4].i + scratch[6].i;
358 
359  // second result
360  scratch_out[1].r = scratch[5].r + scratch[7].i;
361  scratch_out[1].i = scratch[5].i - scratch[7].r;
362 
363  // forth result
364  scratch_out[3].r = scratch[5].r - scratch[7].i;
365  scratch_out[3].i = scratch[5].i + scratch[7].r;
366 
367  // store
368  *Fout1 = scratch_out[0];
369  Fout2 = Fout1 + mstride;
370  *Fout2 = scratch_out[1];
371  Fout2 += mstride;
372  *Fout2 = scratch_out[2];
373  Fout2 += mstride;
374  *Fout2 = scratch_out[3];
375 
376  tw1++;
377  Fin1 ++;
378  Fout1 ++;
379  } // m_count
380  } // f_count
381  tw += mstride * 3;
382  mstride <<= 2;
383  // swap
384  Ftmp = buffer;
385  buffer = Fout;
386  Fout = Ftmp;
387  fstride >>= 2;
388  } // stage_count
389 
390  // the last one
391  if (stage_count)
392  {
393  Fin1 = buffer;
394  // if stage count is even, output to the input array
395  Fout1 = Fout_ls;
396 
397  for (f_count = 0; f_count < fstride; f_count ++)
398  {
399  tw1 = tw;
400  for (m_count = mstride; m_count ; m_count --)
401  {
402  // load
403  scratch_tw[0] = *tw1;
404  tw2 = tw1 + mstride;
405  scratch_tw[1] = *tw2;
406  tw2 += mstride;
407  scratch_tw[2] = *tw2;
408  scratch_in[0] = * Fin1;
409  Fin2 = Fin1 + N;
410  scratch_in[1] = * Fin2;
411  Fin2 += N;
412  scratch_in[2] = * Fin2;
413  Fin2 += N;
414  scratch_in[3] = * Fin2;
415  if (scaled_flag == 1)
416  {
417  NE10_F2I16_FIXDIV (scratch_in[0], 4);
418  NE10_F2I16_FIXDIV (scratch_in[1], 4);
419  NE10_F2I16_FIXDIV (scratch_in[2], 4);
420  NE10_F2I16_FIXDIV (scratch_in[3], 4);
421  }
422 
423  // radix 4 butterfly with twiddles
424 
425  scratch[0] = scratch_in[0];
426  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
427  - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
428  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
429  + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
430 
431  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
432  - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
433  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
434  + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
435 
436  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
437  - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
438  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
439  + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
440 
441  // radix 2 butterfly
442  scratch[4].r = scratch[0].r + scratch[2].r;
443  scratch[4].i = scratch[0].i + scratch[2].i;
444 
445  scratch[5].r = scratch[0].r - scratch[2].r;
446  scratch[5].i = scratch[0].i - scratch[2].i;
447 
448  // radix 2 butterfly
449  scratch[6].r = scratch[1].r + scratch[3].r;
450  scratch[6].i = scratch[1].i + scratch[3].i;
451 
452  scratch[7].r = scratch[1].r - scratch[3].r;
453  scratch[7].i = scratch[1].i - scratch[3].i;
454 
455  // third result
456  scratch_out[2].r = scratch[4].r - scratch[6].r;
457  scratch_out[2].i = scratch[4].i - scratch[6].i;
458 
459  // first result
460  scratch_out[0].r = scratch[4].r + scratch[6].r;
461  scratch_out[0].i = scratch[4].i + scratch[6].i;
462 
463  // second result
464  scratch_out[1].r = scratch[5].r + scratch[7].i;
465  scratch_out[1].i = scratch[5].i - scratch[7].r;
466 
467  // forth result
468  scratch_out[3].r = scratch[5].r - scratch[7].i;
469  scratch_out[3].i = scratch[5].i + scratch[7].r;
470 
471  // store
472  *Fout1 = scratch_out[0];
473  Fout2 = Fout1 + N;
474  *Fout2 = scratch_out[1];
475  Fout2 += N;
476  *Fout2 = scratch_out[2];
477  Fout2 += N;
478  *Fout2 = scratch_out[3];
479 
480  tw1 ++;
481  Fin1 ++;
482  Fout1 ++;
483  } // m_count
484  } // f_count
485  } // last stage
486 }
487 
488 static void ne10_mixed_radix_butterfly_inverse_int16_c (ne10_fft_cpx_int16_t * Fout,
489  ne10_fft_cpx_int16_t * Fin,
490  ne10_int32_t * factors,
491  ne10_fft_cpx_int16_t * twiddles,
492  ne10_fft_cpx_int16_t * buffer,
493  ne10_int32_t scaled_flag)
494 {
495  ne10_int32_t fstride, mstride, N;
496  ne10_int32_t fstride1;
497  ne10_int32_t f_count, m_count;
498  ne10_int32_t stage_count;
499 
500  ne10_fft_cpx_int16_t scratch_in[8];
501  ne10_fft_cpx_int16_t scratch_out[8];
502  ne10_fft_cpx_int16_t scratch[16];
503  ne10_fft_cpx_int16_t scratch_tw[6];
504 
505  ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
506  ne10_fft_cpx_int16_t *Fout_ls = Fout;
507  ne10_fft_cpx_int16_t *Ftmp;
508  ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
509  const ne10_int32_t TW_81 = 23169;
510  const ne10_int32_t TW_81N = -23169;
511 
512  // init fstride, mstride, N
513  stage_count = factors[0];
514  fstride = factors[1];
515  mstride = factors[ (stage_count << 1) - 1 ];
516  N = factors[ stage_count << 1 ]; // radix
517  tw = twiddles;
518 
519  // the first stage
520  Fin1 = Fin;
521  Fout1 = Fout;
522  if (N == 2) // length of FFT is 2^n (n is odd)
523  {
524  // radix 8
525  N = fstride >> 1; // 1/4 of length of FFT
526  fstride1 = fstride >> 2;
527 
528  Fin1 = Fin;
529  for (f_count = 0; f_count < fstride1; f_count ++)
530  {
531  Fout1 = & Fout[ f_count * 8 ];
532  // load
533  if (scaled_flag == 1)
534  {
535  NE10_F2I16_FIXDIV (Fin1[0], 8);
536  NE10_F2I16_FIXDIV (Fin1[0 + fstride], 8);
537  NE10_F2I16_FIXDIV (Fin1[fstride1], 8);
538  NE10_F2I16_FIXDIV (Fin1[fstride1 + fstride], 8);
539  NE10_F2I16_FIXDIV (Fin1[fstride1 * 2], 8);
540  NE10_F2I16_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
541  NE10_F2I16_FIXDIV (Fin1[fstride1 * 3], 8);
542  NE10_F2I16_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
543  }
544 
545  scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
546  scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
547  scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
548  scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
549  scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
550  scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
551  scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
552  scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
553  scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
554  scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
555  scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
556  scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
557  scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
558  scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
559  scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
560  scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
561 
562  // radix 4 butterfly with twiddles
563 
564  scratch[0] = scratch_in[0];
565  scratch[1] = scratch_in[1];
566 
567  scratch[2] = scratch_in[2];
568  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r - scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
569  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i + scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
570 
571  scratch[4] = scratch_in[4];
572  scratch[5].r = -scratch_in[5].i;
573  scratch[5].i = scratch_in[5].r;
574 
575  scratch[6].r = scratch_in[6].r;
576  scratch[6].i = scratch_in[6].i;
577  scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r + scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
578  scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i - scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
579 
580  // radix 2 butterfly
581  scratch[8].r = scratch[0].r + scratch[4].r;
582  scratch[8].i = scratch[0].i + scratch[4].i;
583  scratch[9].r = scratch[1].r + scratch[5].r;
584  scratch[9].i = scratch[1].i + scratch[5].i;
585 
586  scratch[10].r = scratch[0].r - scratch[4].r;
587  scratch[10].i = scratch[0].i - scratch[4].i;
588  scratch[11].r = scratch[1].r - scratch[5].r;
589  scratch[11].i = scratch[1].i - scratch[5].i;
590 
591  // radix 2 butterfly
592  scratch[12].r = scratch[2].r + scratch[6].r;
593  scratch[12].i = scratch[2].i + scratch[6].i;
594  scratch[13].r = scratch[3].r + scratch[7].r;
595  scratch[13].i = scratch[3].i + scratch[7].i;
596 
597  scratch[14].r = scratch[2].r - scratch[6].r;
598  scratch[14].i = scratch[2].i - scratch[6].i;
599  scratch[15].r = scratch[3].r - scratch[7].r;
600  scratch[15].i = scratch[3].i - scratch[7].i;
601 
602  // third result
603  scratch_out[4].r = scratch[8].r - scratch[12].r;
604  scratch_out[4].i = scratch[8].i - scratch[12].i;
605  scratch_out[5].r = scratch[9].r - scratch[13].r;
606  scratch_out[5].i = scratch[9].i - scratch[13].i;
607 
608  // first result
609  scratch_out[0].r = scratch[8].r + scratch[12].r;
610  scratch_out[0].i = scratch[8].i + scratch[12].i;
611  scratch_out[1].r = scratch[9].r + scratch[13].r;
612  scratch_out[1].i = scratch[9].i + scratch[13].i;
613 
614  // second result
615  scratch_out[2].r = scratch[10].r - scratch[14].i;
616  scratch_out[2].i = scratch[10].i + scratch[14].r;
617  scratch_out[3].r = scratch[11].r - scratch[15].i;
618  scratch_out[3].i = scratch[11].i + scratch[15].r;
619 
620  // forth result
621  scratch_out[6].r = scratch[10].r + scratch[14].i;
622  scratch_out[6].i = scratch[10].i - scratch[14].r;
623  scratch_out[7].r = scratch[11].r + scratch[15].i;
624  scratch_out[7].i = scratch[11].i - scratch[15].r;
625 
626  // store
627  Fout1[0] = scratch_out[0];
628  Fout1[1] = scratch_out[1];
629  Fout1[2] = scratch_out[2];
630  Fout1[3] = scratch_out[3];
631  Fout1[4] = scratch_out[4];
632  Fout1[5] = scratch_out[5];
633  Fout1[6] = scratch_out[6];
634  Fout1[7] = scratch_out[7];
635 
636  Fin1 += 1;
637  } // f_count
638  tw += 6;
639  mstride <<= 2;
640  fstride >>= 4;
641  stage_count -= 2;
642 
643  // swap
644  Ftmp = buffer;
645  buffer = Fout;
646  Fout = Ftmp;
647  }
648  else if (N == 4) // length of FFT is 2^n (n is even)
649  {
650  //fstride is nfft>>2
651  for (f_count = fstride; f_count ; f_count --)
652  {
653  // load
654  scratch_in[0] = *Fin1;
655  Fin2 = Fin1 + fstride;
656  scratch_in[1] = *Fin2;
657  Fin2 = Fin2 + fstride;
658  scratch_in[2] = *Fin2;
659  Fin2 = Fin2 + fstride;
660  scratch_in[3] = *Fin2;
661 
662  // radix 4 butterfly without twiddles
663  if (scaled_flag == 1)
664  {
665  NE10_F2I16_FIXDIV (scratch_in[0], 4);
666  NE10_F2I16_FIXDIV (scratch_in[1], 4);
667  NE10_F2I16_FIXDIV (scratch_in[2], 4);
668  NE10_F2I16_FIXDIV (scratch_in[3], 4);
669  }
670 
671  // radix 2 butterfly
672  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
673  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
674 
675  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
676  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
677 
678  // radix 2 butterfly
679  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
680  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
681 
682  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
683  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
684 
685  // third result
686  scratch_out[2].r = scratch[0].r - scratch[2].r;
687  scratch_out[2].i = scratch[0].i - scratch[2].i;
688 
689  // first result
690  scratch_out[0].r = scratch[0].r + scratch[2].r;
691  scratch_out[0].i = scratch[0].i + scratch[2].i;
692 
693  // second result
694  scratch_out[1].r = scratch[1].r - scratch[3].i;
695  scratch_out[1].i = scratch[1].i + scratch[3].r;
696 
697  // forth result
698  scratch_out[3].r = scratch[1].r + scratch[3].i;
699  scratch_out[3].i = scratch[1].i - scratch[3].r;
700 
701  // store
702  * Fout1 ++ = scratch_out[0];
703  * Fout1 ++ = scratch_out[1];
704  * Fout1 ++ = scratch_out[2];
705  * Fout1 ++ = scratch_out[3];
706 
707  Fin1++;
708  } // f_count
709 
710  N = fstride; // 1/4 of length of FFT
711  // update address for other stages
712  stage_count--;
713  fstride >>= 2;
714 
715  // swap
716  Ftmp = buffer;
717  buffer = Fout;
718  Fout = Ftmp;
719 
720  // end of first stage
721  }
722 
723 
724  // others but the last one
725  for (; stage_count > 1 ; stage_count--)
726  {
727  Fin1 = buffer;
728  for (f_count = 0; f_count < fstride; f_count ++)
729  {
730  Fout1 = & Fout[ f_count * mstride << 2 ];
731  tw1 = tw;
732  for (m_count = mstride; m_count ; m_count --)
733  {
734  // load
735  scratch_tw[0] = *tw1;
736  tw2 = tw1 + mstride;
737  scratch_tw[1] = *tw2;
738  tw2 += mstride;
739  scratch_tw[2] = *tw2;
740  scratch_in[0] = * Fin1;
741  Fin2 = Fin1 + N;
742  scratch_in[1] = * Fin2;
743  Fin2 += N;
744  scratch_in[2] = * Fin2;
745  Fin2 += N;
746  scratch_in[3] = * Fin2;
747  if (scaled_flag == 1)
748  {
749  NE10_F2I16_FIXDIV (scratch_in[0], 4);
750  NE10_F2I16_FIXDIV (scratch_in[1], 4);
751  NE10_F2I16_FIXDIV (scratch_in[2], 4);
752  NE10_F2I16_FIXDIV (scratch_in[3], 4);
753  }
754 
755  // radix 4 butterfly with twiddles
756 
757  scratch[0] = scratch_in[0];
758  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
759  + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
760  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
761  - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
762 
763  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
764  + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
765  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
766  - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
767 
768  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
769  + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
770  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
771  - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
772 
773  // radix 2 butterfly
774  scratch[4].r = scratch[0].r + scratch[2].r;
775  scratch[4].i = scratch[0].i + scratch[2].i;
776 
777  scratch[5].r = scratch[0].r - scratch[2].r;
778  scratch[5].i = scratch[0].i - scratch[2].i;
779 
780  // radix 2 butterfly
781  scratch[6].r = scratch[1].r + scratch[3].r;
782  scratch[6].i = scratch[1].i + scratch[3].i;
783 
784  scratch[7].r = scratch[1].r - scratch[3].r;
785  scratch[7].i = scratch[1].i - scratch[3].i;
786 
787  // third result
788  scratch_out[2].r = scratch[4].r - scratch[6].r;
789  scratch_out[2].i = scratch[4].i - scratch[6].i;
790 
791  // first result
792  scratch_out[0].r = scratch[4].r + scratch[6].r;
793  scratch_out[0].i = scratch[4].i + scratch[6].i;
794 
795  // second result
796  scratch_out[1].r = scratch[5].r - scratch[7].i;
797  scratch_out[1].i = scratch[5].i + scratch[7].r;
798 
799  // forth result
800  scratch_out[3].r = scratch[5].r + scratch[7].i;
801  scratch_out[3].i = scratch[5].i - scratch[7].r;
802 
803  // store
804  *Fout1 = scratch_out[0];
805  Fout2 = Fout1 + mstride;
806  *Fout2 = scratch_out[1];
807  Fout2 += mstride;
808  *Fout2 = scratch_out[2];
809  Fout2 += mstride;
810  *Fout2 = scratch_out[3];
811 
812  tw1++;
813  Fin1 ++;
814  Fout1 ++;
815  } // m_count
816  } // f_count
817  tw += mstride * 3;
818  mstride <<= 2;
819  // swap
820  Ftmp = buffer;
821  buffer = Fout;
822  Fout = Ftmp;
823  fstride >>= 2;
824  } // stage_count
825 
826  // the last one
827  if (stage_count)
828  {
829  Fin1 = buffer;
830  // if stage count is even, output to the input array
831  Fout1 = Fout_ls;
832 
833  for (f_count = 0; f_count < fstride; f_count ++)
834  {
835  tw1 = tw;
836  for (m_count = mstride; m_count ; m_count --)
837  {
838  // load
839  scratch_tw[0] = *tw1;
840  tw2 = tw1 + mstride;
841  scratch_tw[1] = *tw2;
842  tw2 += mstride;
843  scratch_tw[2] = *tw2;
844  scratch_in[0] = * Fin1;
845  Fin2 = Fin1 + N;
846  scratch_in[1] = * Fin2;
847  Fin2 += N;
848  scratch_in[2] = * Fin2;
849  Fin2 += N;
850  scratch_in[3] = * Fin2;
851  if (scaled_flag == 1)
852  {
853  NE10_F2I16_FIXDIV (scratch_in[0], 4);
854  NE10_F2I16_FIXDIV (scratch_in[1], 4);
855  NE10_F2I16_FIXDIV (scratch_in[2], 4);
856  NE10_F2I16_FIXDIV (scratch_in[3], 4);
857  }
858 
859  // radix 4 butterfly with twiddles
860 
861  scratch[0] = scratch_in[0];
862  scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
863  + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
864  scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
865  - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
866 
867  scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
868  + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
869  scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
870  - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
871 
872  scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
873  + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
874  scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
875  - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
876 
877  // radix 2 butterfly
878  scratch[4].r = scratch[0].r + scratch[2].r;
879  scratch[4].i = scratch[0].i + scratch[2].i;
880 
881  scratch[5].r = scratch[0].r - scratch[2].r;
882  scratch[5].i = scratch[0].i - scratch[2].i;
883 
884  // radix 2 butterfly
885  scratch[6].r = scratch[1].r + scratch[3].r;
886  scratch[6].i = scratch[1].i + scratch[3].i;
887 
888  scratch[7].r = scratch[1].r - scratch[3].r;
889  scratch[7].i = scratch[1].i - scratch[3].i;
890 
891  // third result
892  scratch_out[2].r = scratch[4].r - scratch[6].r;
893  scratch_out[2].i = scratch[4].i - scratch[6].i;
894 
895  // first result
896  scratch_out[0].r = scratch[4].r + scratch[6].r;
897  scratch_out[0].i = scratch[4].i + scratch[6].i;
898 
899  // second result
900  scratch_out[1].r = scratch[5].r - scratch[7].i;
901  scratch_out[1].i = scratch[5].i + scratch[7].r;
902 
903  // forth result
904  scratch_out[3].r = scratch[5].r + scratch[7].i;
905  scratch_out[3].i = scratch[5].i - scratch[7].r;
906 
907  // store
908  *Fout1 = scratch_out[0];
909  Fout2 = Fout1 + N;
910  *Fout2 = scratch_out[1];
911  Fout2 += N;
912  *Fout2 = scratch_out[2];
913  Fout2 += N;
914  *Fout2 = scratch_out[3];
915 
916  tw1 ++;
917  Fin1 ++;
918  Fout1 ++;
919  } // m_count
920  } // f_count
921  } // last stage
922 }
923 
924 static void ne10_fft_split_r2c_1d_int16 (ne10_fft_cpx_int16_t *dst,
925  const ne10_fft_cpx_int16_t *src,
926  ne10_fft_cpx_int16_t *twiddles,
927  ne10_int32_t ncfft,
928  ne10_int32_t scaled_flag)
929 {
930  ne10_int32_t k;
931  ne10_fft_cpx_int16_t fpnk, fpk, f1k, f2k, tw, tdc;
932 
933  tdc.r = src[0].r;
934  tdc.i = src[0].i;
935 
936  if (scaled_flag)
937  NE10_F2I16_FIXDIV (tdc, 2);
938 
939  dst[0].r = tdc.r + tdc.i;
940  dst[ncfft].r = tdc.r - tdc.i;
941  dst[ncfft].i = dst[0].i = 0;
942 
943  for (k = 1; k <= ncfft / 2 ; ++k)
944  {
945  fpk = src[k];
946  fpnk.r = src[ncfft - k].r;
947  fpnk.i = - src[ncfft - k].i;
948  if (scaled_flag)
949  {
950  NE10_F2I16_FIXDIV (fpk, 2);
951  NE10_F2I16_FIXDIV (fpnk, 2);
952  }
953 
954  f1k.r = fpk.r + fpnk.r;
955  f1k.i = fpk.i + fpnk.i;
956 
957  f2k.r = fpk.r - fpnk.r;
958  f2k.i = fpk.i - fpnk.i;
959 
960  tw.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).r
961  - (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
962  tw.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).i
963  + (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).r) >> NE10_F2I16_SHIFT);
964 
965  dst[k].r = (f1k.r + tw.r) >> 1;
966  dst[k].i = (f1k.i + tw.i) >> 1;
967  dst[ncfft - k].r = (f1k.r - tw.r) >> 1;
968  dst[ncfft - k].i = (tw.i - f1k.i) >> 1;
969  }
970 }
971 
972 static void ne10_fft_split_c2r_1d_int16 (ne10_fft_cpx_int16_t *dst,
973  const ne10_fft_cpx_int16_t *src,
974  ne10_fft_cpx_int16_t *twiddles,
975  ne10_int32_t ncfft,
976  ne10_int32_t scaled_flag)
977 {
978 
979  ne10_int32_t k;
980  ne10_fft_cpx_int16_t fk, fnkc, fek, fok, tmp;
981 
982 
983  dst[0].r = src[0].r + src[ncfft].r;
984  dst[0].i = src[0].r - src[ncfft].r;
985 
986  if (scaled_flag)
987  NE10_F2I16_FIXDIV (dst[0], 2);
988 
989  for (k = 1; k <= ncfft / 2; k++)
990  {
991  fk = src[k];
992  fnkc.r = src[ncfft - k].r;
993  fnkc.i = -src[ncfft - k].i;
994  if (scaled_flag)
995  {
996  NE10_F2I16_FIXDIV (fk, 2);
997  NE10_F2I16_FIXDIV (fnkc, 2);
998  }
999 
1000  fek.r = fk.r + fnkc.r;
1001  fek.i = fk.i + fnkc.i;
1002 
1003  tmp.r = fk.r - fnkc.r;
1004  tmp.i = fk.i - fnkc.i;
1005 
1006  fok.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).r
1007  + (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1008  fok.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).r
1009  - (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1010 
1011  dst[k].r = fek.r + fok.r;
1012  dst[k].i = fek.i + fok.i;
1013 
1014  dst[ncfft - k].r = fek.r - fok.r;
1015  dst[ncfft - k].i = fok.i - fek.i;
1016  }
1017 }
1018 
1031 {
1032  ne10_fft_cfg_int16_t st = NULL;
1033  ne10_uint32_t memneeded = sizeof (ne10_fft_state_int16_t)
1034  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1035  + sizeof (ne10_fft_cpx_int16_t) * (nfft) /* twiddle */
1036  + sizeof (ne10_fft_cpx_int16_t) * nfft /* buffer */
1037  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1038 
1039  st = (ne10_fft_cfg_int16_t) NE10_MALLOC (memneeded);
1040 
1041  if (st)
1042  {
1043  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int16_t);
1044  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1045  st->factors = (ne10_int32_t*) address;
1046  st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1047  st->buffer = st->twiddles + nfft;
1048  st->nfft = nfft;
1049 
1050  ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1051  if (result == NE10_ERR)
1052  {
1053  NE10_FREE (st);
1054  return st;
1055  }
1056 
1057  ne10_int32_t i, j;
1058  ne10_int32_t *factors = st->factors;
1059  ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1061  ne10_int32_t stage_count = factors[0];
1062  ne10_int32_t fstride1 = factors[1];
1063  ne10_int32_t fstride2 = fstride1 * 2;
1064  ne10_int32_t fstride3 = fstride1 * 3;
1065  ne10_int32_t m;
1066 
1067  const ne10_float32_t pi = NE10_PI;
1068  ne10_float32_t phase1;
1069  ne10_float32_t phase2;
1070  ne10_float32_t phase3;
1071 
1072  for (i = stage_count - 1; i > 0; i--)
1073  {
1074  fstride1 >>= 2;
1075  fstride2 >>= 2;
1076  fstride3 >>= 2;
1077  m = factors[2 * i + 1];
1078  tw = twiddles;
1079  for (j = 0; j < m; j++)
1080  {
1081  phase1 = -2 * pi * fstride1 * j / nfft;
1082  phase2 = -2 * pi * fstride2 * j / nfft;
1083  phase3 = -2 * pi * fstride3 * j / nfft;
1084  tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1085  tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1086  (tw + m)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase2));
1087  (tw + m)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase2));
1088  (tw + m * 2)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase3));
1089  (tw + m * 2)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase3));
1090  tw++;
1091  }
1092  twiddles += m * 3;
1093  }
1094 
1095  }
1096  return st;
1097 }
1098 
1111  ne10_fft_cpx_int16_t *fin,
1113  ne10_int32_t inverse_fft,
1114  ne10_int32_t scaled_flag)
1115 {
1116  if (inverse_fft)
1117  ne10_mixed_radix_butterfly_inverse_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1118  else
1119  ne10_mixed_radix_butterfly_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1120 }
1121 
1122 
1123 
1124  //end of C2C_FFT_IFFT group
1128 
1141 {
1142  ne10_fft_r2c_cfg_int16_t st = NULL;
1143  ne10_int32_t ncfft = nfft >> 1;
1144 
1145  ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int16_t)
1146  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1147  + sizeof (ne10_fft_cpx_int16_t) * ncfft /* twiddle*/
1148  + sizeof (ne10_fft_cpx_int16_t) * ncfft / 2 /* super twiddles*/
1149  + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer*/
1150  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1151 
1152  st = (ne10_fft_r2c_cfg_int16_t) NE10_MALLOC (memneeded);
1153 
1154  if (st)
1155  {
1156  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_int16_t);
1157  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1158  st->factors = (ne10_int32_t*) address;
1159  st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1160  st->super_twiddles = st->twiddles + ncfft;
1161  st->buffer = st->super_twiddles + (ncfft / 2);
1162  st->ncfft = ncfft;
1163 
1164  ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1165  if (result == NE10_ERR)
1166  {
1167  NE10_FREE (st);
1168  return st;
1169  }
1170 
1171  ne10_int32_t i, j;
1172  ne10_int32_t *factors = st->factors;
1173  ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1175  ne10_int32_t stage_count = factors[0];
1176  ne10_int32_t fstride1 = factors[1];
1177  ne10_int32_t fstride2 = fstride1 * 2;
1178  ne10_int32_t fstride3 = fstride1 * 3;
1179  ne10_int32_t m;
1180 
1181  const ne10_float32_t pi = NE10_PI;
1182  ne10_float32_t phase1;
1183  ne10_float32_t phase2;
1184  ne10_float32_t phase3;
1185 
1186  for (i = stage_count - 1; i > 0; i--)
1187  {
1188  fstride1 >>= 2;
1189  fstride2 >>= 2;
1190  fstride3 >>= 2;
1191  m = factors[2 * i + 1];
1192  tw = twiddles;
1193  for (j = 0; j < m; j++)
1194  {
1195  phase1 = -2 * pi * fstride1 * j / ncfft;
1196  phase2 = -2 * pi * fstride2 * j / ncfft;
1197  phase3 = -2 * pi * fstride3 * j / ncfft;
1198  tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1199  tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1200  (tw + m)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase2));
1201  (tw + m)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase2));
1202  (tw + m * 2)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase3));
1203  (tw + m * 2)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase3));
1204  tw++;
1205  }
1206  twiddles += m * 3;
1207  }
1208 
1209  tw = st->super_twiddles;
1210  for (i = 0; i < ncfft / 2; i++)
1211  {
1212  phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1213  tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1214  tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1215  tw++;
1216  }
1217 
1218  }
1219  return st;
1220 }
1221 
1234  ne10_int16_t *fin,
1236  ne10_int32_t scaled_flag)
1237 {
1238  ne10_fft_cpx_int16_t * tmpbuf = cfg->buffer;
1239 
1240  ne10_mixed_radix_butterfly_int16_c (tmpbuf, (ne10_fft_cpx_int16_t*) fin, cfg->factors, cfg->twiddles, fout, scaled_flag);
1241  ne10_fft_split_r2c_1d_int16 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1242 }
1243 
1255 void ne10_fft_c2r_1d_int16_c (ne10_int16_t *fout,
1256  ne10_fft_cpx_int16_t *fin,
1258  ne10_int32_t scaled_flag)
1259 {
1260  ne10_fft_cpx_int16_t * tmpbuf1 = cfg->buffer;
1261  ne10_fft_cpx_int16_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1262 
1263  ne10_fft_split_c2r_1d_int16 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1264  ne10_mixed_radix_butterfly_inverse_int16_c ( (ne10_fft_cpx_int16_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2, scaled_flag);
1265 }
1266 
ne10_fft_r2c_1d_int16_c
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
Definition: NE10_fft_int16.c:1233
ne10_fft_c2r_1d_int16_c
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
Definition: NE10_fft_int16.c:1255
ne10_fft_c2c_1d_int16_c
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
Definition: NE10_fft_int16.c:1110
ne10_fft_cpx_int32_t
structure for the 32 bits fixed point FFT function.
Definition: NE10_types.h:328
ne10_fft_state_int16_t
Definition: NE10_types.h:303
ne10_fft_alloc_r2c_int16
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
Definition: NE10_fft_int16.c:1140
ne10_fft_r2c_state_int16_t
Definition: NE10_types.h:313
ne10_fft_alloc_c2c_int16
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_int16.c:1030
ne10_fft_cpx_int16_t
structure for the 16 bits fixed point FFT function.
Definition: NE10_types.h:297