Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
test_suite_fft_int32.c
1 /*
2  * Copyright 2013-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fft_int32.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_dsp.h"
38 #include "seatest.h"
39 #include "unit_test_common.h"
40 
41 
42 /* ----------------------------------------------------------------------
43 ** Global defines
44 ** ------------------------------------------------------------------- */
45 
46 /* Max FFT Length and double buffer for real and imag */
47 #define TEST_LENGTH_SAMPLES (32768)
48 #define MIN_LENGTH_SAMPLES_CPX (4)
49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
50 
51 #define SNR_THRESHOLD_INT32 25.0f
52 
53 #define TEST_COUNT 250000
54 
55 /* ----------------------------------------------------------------------
56 ** Defines each of the tests performed
57 ** ------------------------------------------------------------------- */
58 
59 //input and output
60 static ne10_int32_t testInput_i32_unscaled[TEST_LENGTH_SAMPLES * 2];
61 static ne10_int32_t testInput_i32_scaled[TEST_LENGTH_SAMPLES * 2];
62 static ne10_int32_t * guarded_in_c = NULL;
63 static ne10_int32_t * guarded_in_neon = NULL;
64 static ne10_int32_t * in_c = NULL;
65 static ne10_int32_t * in_neon = NULL;
66 
67 static ne10_int32_t * guarded_out_c = NULL;
68 static ne10_int32_t * guarded_out_neon = NULL;
69 static ne10_int32_t * out_c = NULL;
70 static ne10_int32_t * out_neon = NULL;
71 
72 static ne10_float32_t snr = 0.0f;
73 
74 static ne10_int64_t time_c = 0;
75 static ne10_int64_t time_neon = 0;
76 static ne10_float32_t time_speedup = 0.0f;
77 static ne10_float32_t time_savings = 0.0f;
78 
79 void test_fft_c2c_1d_int32_conformance()
80 {
81 
82  ne10_int32_t i = 0;
83  ne10_int32_t fftSize = 0;
85  ne10_fft_cfg_int32_t cfg_neon;
86  ne10_float32_t * out_c_tmp = NULL;
87  ne10_float32_t * out_neon_tmp = NULL;
88 
89  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
90 
91  /* init input memory */
92  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
93  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
94  in_c = guarded_in_c + ARRAY_GUARD_LEN;
95  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
96 
97  /* init dst memory */
98  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
99  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
100  out_c = guarded_out_c + ARRAY_GUARD_LEN;
101  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
102 
103  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
104  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
105 
106  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
107  {
108  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
109  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
110  }
111  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
112  {
113  fprintf (stdout, "FFT size %d\n", fftSize);
114  /* FFT init */
115  cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
116  if (cfg_c == NULL)
117  {
118  fprintf (stdout, "======ERROR, FFT alloc fails\n");
119  return;
120  }
121 
122  cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
123  if (cfg_neon == NULL)
124  {
125  NE10_FREE (cfg_c);
126  fprintf (stdout, "======ERROR, FFT alloc fails\n");
127  return;
128  }
129 
130  /* unscaled FFT test */
131  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
132  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
133 
134  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
135  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
136  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
137  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 0);
138  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
139  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
140 
141  //conformance test
142  for (i = 0; i < fftSize * 2; i++)
143  {
144  out_c_tmp[i] = (ne10_float32_t) out_c[i];
145  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
146  }
147  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
148  assert_false ( (snr < SNR_THRESHOLD_INT32));
149 
150  /* IFFT test */
151  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
152  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
153 
154  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
155  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
156  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
157  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
158  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
159  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
160 
161  //conformance test
162  for (i = 0; i < fftSize * 2; i++)
163  {
164  out_c_tmp[i] = (ne10_float32_t) out_c[i];
165  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
166  }
167  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
168  assert_false ( (snr < SNR_THRESHOLD_INT32));
169 
170  /* scaled FFT test */
171  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
172  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
173 
174  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
175  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
176  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
177  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 1);
178  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
179  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
180 
181  //conformance test
182  for (i = 0; i < fftSize * 2; i++)
183  {
184  out_c_tmp[i] = (ne10_float32_t) out_c[i];
185  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
186  }
187  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
188  assert_false ( (snr < SNR_THRESHOLD_INT32));
189 
190  /* IFFT test */
191  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
192  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
193 
194  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
195  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
196  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
197  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
198  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
199  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
200 
201  //conformance test
202  for (i = 0; i < fftSize * 2; i++)
203  {
204  out_c_tmp[i] = (ne10_float32_t) out_c[i];
205  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
206  }
207  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
208  assert_false ( (snr < SNR_THRESHOLD_INT32));
209 
210  NE10_FREE (cfg_c);
211  NE10_FREE (cfg_neon);
212  }
213 
214  NE10_FREE (guarded_in_c);
215  NE10_FREE (guarded_in_neon);
216  NE10_FREE (guarded_out_c);
217  NE10_FREE (guarded_out_neon);
218  NE10_FREE (out_c_tmp);
219  NE10_FREE (out_neon_tmp);
220 }
221 
222 void test_fft_c2c_1d_int32_performance()
223 {
224 
225  ne10_int32_t i = 0;
226  ne10_int32_t fftSize = 0;
227  ne10_fft_cfg_int32_t cfg_c;
228  ne10_fft_cfg_int32_t cfg_neon;
229  ne10_int32_t test_loop = 0;
230 
231  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
232  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
233 
234  /* init input memory */
235  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
236  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
237  in_c = guarded_in_c + ARRAY_GUARD_LEN;
238  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
239 
240  /* init dst memory */
241  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
242  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
243  out_c = guarded_out_c + ARRAY_GUARD_LEN;
244  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
245 
246  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
247  {
248  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
249  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
250  }
251  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
252  {
253  fprintf (stdout, "FFT size %d\n", fftSize);
254 
255  /* FFT test */
256  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
257  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
258  cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
259  if (cfg_c == NULL)
260  {
261  fprintf (stdout, "======ERROR, FFT alloc fails\n");
262  return;
263  }
264 
265  cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
266  if (cfg_neon == NULL)
267  {
268  NE10_FREE (cfg_c);
269  fprintf (stdout, "======ERROR, FFT alloc fails\n");
270  return;
271  }
272 
273  test_loop = TEST_COUNT / fftSize;
274 
275  GET_TIME
276  (
277  time_c,
278  {
279  for (i = 0; i < test_loop; i++)
280  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
281  }
282  );
283  GET_TIME
284  (
285  time_neon,
286  {
287  for (i = 0; i < test_loop; i++)
288  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 0);
289  }
290  );
291  time_speedup = (ne10_float32_t) time_c / time_neon;
292  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
293  ne10_log (__FUNCTION__, " unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
294 
295  /* IFFT test */
296  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
297  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
298 
299  GET_TIME
300  (
301  time_c,
302  {
303  for (i = 0; i < test_loop; i++)
304  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
305  }
306  );
307  GET_TIME
308  (
309  time_neon,
310  {
311  for (i = 0; i < test_loop; i++)
312  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
313  }
314  );
315 
316  time_speedup = (ne10_float32_t) time_c / time_neon;
317  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
318  ne10_log (__FUNCTION__, "unscaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
319 
320  /* FFT test */
321  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
322  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
323 
324  GET_TIME
325  (
326  time_c,
327  {
328  for (i = 0; i < test_loop; i++)
329  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
330  }
331  );
332  GET_TIME
333  (
334  time_neon,
335  {
336  for (i = 0; i < test_loop; i++)
337  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 1);
338  }
339  );
340  time_speedup = (ne10_float32_t) time_c / time_neon;
341  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
342  ne10_log (__FUNCTION__, " scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
343 
344  /* IFFT test */
345  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
346  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
347 
348  GET_TIME
349  (
350  time_c,
351  {
352  for (i = 0; i < test_loop; i++)
353  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
354  }
355  );
356  GET_TIME
357  (
358  time_neon,
359  {
360  for (i = 0; i < test_loop; i++)
361  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
362  }
363  );
364 
365  time_speedup = (ne10_float32_t) time_c / time_neon;
366  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
367  ne10_log (__FUNCTION__, " scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
368 
369  NE10_FREE (cfg_c);
370  NE10_FREE (cfg_neon);
371  }
372 
373  NE10_FREE (guarded_in_c);
374  NE10_FREE (guarded_in_neon);
375  NE10_FREE (guarded_out_c);
376  NE10_FREE (guarded_out_neon);
377 }
378 
379 void test_fft_r2c_1d_int32_conformance()
380 {
381 
382  ne10_int32_t i = 0;
383  ne10_int32_t fftSize = 0;
385  ne10_float32_t * out_c_tmp = NULL;
386  ne10_float32_t * out_neon_tmp = NULL;
387 
388  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
389 
390  /* init input memory */
391  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
392  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
393  in_c = guarded_in_c + ARRAY_GUARD_LEN;
394  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
395 
396  /* init dst memory */
397  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
398  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
399  out_c = guarded_out_c + ARRAY_GUARD_LEN;
400  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
401 
402  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
403  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
404 
405  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
406  {
407  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
408  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
409  }
410  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
411  {
412  fprintf (stdout, "FFT size %d\n", fftSize);
413  /* FFT init */
414  cfg = ne10_fft_alloc_r2c_int32 (fftSize);
415  if (cfg == NULL)
416  {
417  fprintf (stdout, "======ERROR, FFT alloc fails\n");
418  return;
419  }
420 
421  /* unscaled FFT test */
422  memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
423  memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
424 
425  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
426  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
427 
428  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
429  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
430 
431  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
432  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
433 
434  //conformance test
435  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
436  {
437  out_c_tmp[i] = (ne10_float32_t) out_c[i];
438  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
439  }
440  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
441  assert_false ( (snr < SNR_THRESHOLD_INT32));
442 
443  /* IFFT test */
444  for (i = 1; i < (fftSize / 2); i++)
445  {
446  in_c[2 * i] = testInput_i32_unscaled[2 * i];
447  in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
448  in_c[2 * (fftSize - i)] = in_c[2 * i];
449  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
450  }
451  in_c[0] = testInput_i32_unscaled[0];
452  in_c[1] = 0;
453  in_c[fftSize] = testInput_i32_unscaled[1];
454  in_c[fftSize + 1] = 0;
455  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
456 
457  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
458  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
459 
460  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
461  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
462 
463  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
464  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
465 
466  //conformance test
467  for (i = 0; i < fftSize; i++)
468  {
469  out_c_tmp[i] = (ne10_float32_t) out_c[i];
470  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
471  }
472  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
473  assert_false ( (snr < SNR_THRESHOLD_INT32));
474 
475  /* scaled FFT test */
476  memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
477  memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
478 
479  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
480  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
481 
482  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
483  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
484 
485  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
486  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
487 
488  //conformance test
489  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
490  {
491  out_c_tmp[i] = (ne10_float32_t) out_c[i];
492  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
493  }
494  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
495  assert_false ( (snr < SNR_THRESHOLD_INT32));
496 
497  /* IFFT test */
498  for (i = 1; i < (fftSize / 2); i++)
499  {
500  in_c[2 * i] = testInput_i32_scaled[2 * i];
501  in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
502  in_c[2 * (fftSize - i)] = in_c[2 * i];
503  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
504  }
505  in_c[0] = testInput_i32_scaled[0];
506  in_c[1] = 0;
507  in_c[fftSize] = testInput_i32_scaled[1];
508  in_c[fftSize + 1] = 0;
509  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
510 
511  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
512  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
513 
514  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
515  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
516 
517  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
518  CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
519 
520  //conformance test
521  for (i = 0; i < fftSize; i++)
522  {
523  out_c_tmp[i] = (ne10_float32_t) out_c[i];
524  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
525  }
526  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
527  assert_false ( (snr < SNR_THRESHOLD_INT32));
528 
529 
530  NE10_FREE (cfg);
531  }
532 
533  NE10_FREE (guarded_in_c);
534  NE10_FREE (guarded_in_neon);
535  NE10_FREE (guarded_out_c);
536  NE10_FREE (guarded_out_neon);
537  NE10_FREE (out_c_tmp);
538  NE10_FREE (out_neon_tmp);
539 }
540 
541 void test_fft_r2c_1d_int32_performance()
542 {
543 
544  ne10_int32_t i = 0;
545  ne10_int32_t fftSize = 0;
547  ne10_int32_t test_loop = 0;
548 
549  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
550  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
551 
552  /* init input memory */
553  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
554  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
555  in_c = guarded_in_c + ARRAY_GUARD_LEN;
556  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
557 
558  /* init dst memory */
559  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
560  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
561  out_c = guarded_out_c + ARRAY_GUARD_LEN;
562  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
563 
564  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
565  {
566  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
567  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
568  }
569  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
570  {
571  fprintf (stdout, "FFT size %d\n", fftSize);
572 
573  cfg = ne10_fft_alloc_r2c_int32 (fftSize);
574  if (cfg == NULL)
575  {
576  fprintf (stdout, "======ERROR, FFT alloc fails\n");
577  return;
578  }
579  test_loop = TEST_COUNT / fftSize;
580  /* unscaled FFT test */
581  memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
582  memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
583 
584  GET_TIME
585  (
586  time_c,
587  {
588  for (i = 0; i < test_loop; i++)
589  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
590  }
591  );
592  GET_TIME
593  (
594  time_neon,
595  {
596  for (i = 0; i < test_loop; i++)
597  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
598  }
599  );
600 
601  time_speedup = (ne10_float32_t) time_c / time_neon;
602  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
603  ne10_log (__FUNCTION__, "Int32 unscaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
604 
605  /* IFFT test */
606  for (i = 1; i < (fftSize / 2); i++)
607  {
608  in_c[2 * i] = testInput_i32_unscaled[2 * i];
609  in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
610  in_c[2 * (fftSize - i)] = in_c[2 * i];
611  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
612  }
613  in_c[0] = testInput_i32_unscaled[0];
614  in_c[1] = 0;
615  in_c[fftSize] = testInput_i32_unscaled[1];
616  in_c[fftSize + 1] = 0;
617  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
618 
619  GET_TIME
620  (
621  time_c,
622  {
623  for (i = 0; i < test_loop; i++)
624  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
625  }
626  );
627  GET_TIME
628  (
629  time_neon,
630  {
631  for (i = 0; i < test_loop; i++)
632  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
633  }
634  );
635 
636  time_speedup = (ne10_float32_t) time_c / time_neon;
637  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
638  ne10_log (__FUNCTION__, "Int32 unscaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
639 
640  /* scaled FFT test */
641  memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
642  memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
643 
644  GET_TIME
645  (
646  time_c,
647  {
648  for (i = 0; i < test_loop; i++)
649  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
650  }
651  );
652  GET_TIME
653  (
654  time_neon,
655  {
656  for (i = 0; i < test_loop; i++)
657  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
658  }
659  );
660 
661  time_speedup = (ne10_float32_t) time_c / time_neon;
662  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
663  ne10_log (__FUNCTION__, "Int32 scaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
664 
665  /* IFFT test */
666  for (i = 1; i < (fftSize / 2); i++)
667  {
668  in_c[2 * i] = testInput_i32_scaled[2 * i];
669  in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
670  in_c[2 * (fftSize - i)] = in_c[2 * i];
671  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
672  }
673  in_c[0] = testInput_i32_scaled[0];
674  in_c[1] = 0;
675  in_c[fftSize] = testInput_i32_scaled[1];
676  in_c[fftSize + 1] = 0;
677  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
678 
679  GET_TIME
680  (
681  time_c,
682  {
683  for (i = 0; i < test_loop; i++)
684  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
685  }
686  );
687  GET_TIME
688  (
689  time_neon,
690  {
691  for (i = 0; i < test_loop; i++)
692  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
693  }
694  );
695 
696  time_speedup = (ne10_float32_t) time_c / time_neon;
697  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
698  ne10_log (__FUNCTION__, "Int32 scaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
699 
700  NE10_FREE (cfg);
701  }
702 
703  NE10_FREE (guarded_in_c);
704  NE10_FREE (guarded_in_neon);
705  NE10_FREE (guarded_out_c);
706  NE10_FREE (guarded_out_neon);
707 }
708 
709 void test_fft_c2c_1d_int32()
710 {
711 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
712  test_fft_c2c_1d_int32_conformance();
713 #endif
714 
715 #if defined (PERFORMANCE_TEST)
716  test_fft_c2c_1d_int32_performance();
717 #endif
718 }
719 
720 void test_fft_r2c_1d_int32()
721 {
722 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
723  test_fft_r2c_1d_int32_conformance();
724 #endif
725 
726 #if defined (PERFORMANCE_TEST)
727  test_fft_r2c_1d_int32_performance();
728 #endif
729 }
730 
731 static void my_test_setup (void)
732 {
733  ne10_log_buffer_ptr = ne10_log_buffer;
734 }
735 
736 void test_fixture_fft_c2c_1d_int32 (void)
737 {
738  test_fixture_start(); // starts a fixture
739 
740  fixture_setup (my_test_setup);
741 
742  run_test (test_fft_c2c_1d_int32); // run tests
743 
744  test_fixture_end(); // ends a fixture
745 }
746 
747 void test_fixture_fft_r2c_1d_int32 (void)
748 {
749  test_fixture_start(); // starts a fixture
750 
751  fixture_setup (my_test_setup);
752 
753  run_test (test_fft_r2c_1d_int32); // run tests
754 
755  test_fixture_end(); // ends a fixture
756 }
ne10_fft_c2r_1d_int32_neon
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
Definition: NE10_fft_int32.neonintrinsic.c:1905
ne10_fft_r2c_1d_int32_c
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
Definition: NE10_fft_int32.c:1219
ne10_fft_state_int32_t
Definition: NE10_types.h:334
ne10_fft_alloc_c2c_int32_neon
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft.c:435
ne10_fft_cpx_int32_t
structure for the 32 bits fixed point FFT function.
Definition: NE10_types.h:328
ne10_fft_c2c_1d_int32_c
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
Definition: NE10_fft_int32.c:1072
ne10_fft_r2c_1d_int32_neon
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
Definition: NE10_fft_int32.neonintrinsic.c:1876
ne10_fft_alloc_r2c_int32
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
Definition: NE10_fft_int32.c:1125
ne10_fft_r2c_state_int32_t
Definition: NE10_types.h:345
ne10_fft_alloc_c2c_int32_c
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_int32.c:1027
ne10_fft_c2r_1d_int32_c
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
Definition: NE10_fft_int32.c:1241
ne10_fft_c2c_1d_int32_neon
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
Definition: NE10_fft_int32.neonintrinsic.c:1739