Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
test_suite_fft_float32.c
1 /*
2  * Copyright 2013-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fft_float32.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_dsp.h"
38 #include "NE10_macros.h"
39 #include "seatest.h"
40 #include "unit_test_common.h"
41 
42 
43 /* ----------------------------------------------------------------------
44 ** Global defines
45 ** ------------------------------------------------------------------- */
46 
47 /* Max FFT Length and double buffer for real and imag */
48 #define TEST_LENGTH_SAMPLES (32768)
49 #define MIN_LENGTH_SAMPLES_CPX (4)
50 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2)
51 
52 #define TEST_COUNT 10000000
53 
54 /* ----------------------------------------------------------------------
55 ** Test input data for F32
56 ** Generated by the MATLAB rand() function
57 ** ------------------------------------------------------------------- */
58 
59 static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES * 2];
60 
61 /* ----------------------------------------------------------------------
62 ** Defines each of the tests performed
63 ** ------------------------------------------------------------------- */
64 
65 //input and output
66 static ne10_float32_t * guarded_in_c = NULL;
67 static ne10_float32_t * guarded_in_neon = NULL;
68 static ne10_float32_t * in_c = NULL;
69 static ne10_float32_t * in_neon = NULL;
70 
71 static ne10_float32_t * guarded_out_c = NULL;
72 static ne10_float32_t * guarded_out_neon = NULL;
73 static ne10_float32_t * out_c = NULL;
74 static ne10_float32_t * out_neon = NULL;
75 
76 static ne10_float32_t snr = 0.0f;
77 
78 static ne10_int64_t time_c = 0;
79 static ne10_int64_t time_neon = 0;
80 static ne10_float32_t time_speedup = 0.0f;
81 static ne10_float32_t time_savings = 0.0f;
82 
83 static ne10_fft_cfg_float32_t cfg_c;
84 static ne10_fft_cfg_float32_t cfg_neon;
85 
86 static ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize);
87 
88 void test_fft_c2c_1d_float32_conformance()
89 {
90  ne10_int32_t fftSize = 0;
91  ne10_int32_t flag_result = NE10_OK;
92 
93  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
94 
95  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
96  {
97  fprintf (stdout, "FFT size %d\n", fftSize);
98  flag_result = test_c2c_alloc (fftSize);
99  if (flag_result == NE10_ERR)
100  {
101  return;
102  }
103 
104  /* FFT test */
105  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
106  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
107 
108  GUARD_ARRAY (out_c, fftSize * 2);
109  GUARD_ARRAY (out_neon, fftSize * 2);
110 
112  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
113 
114  CHECK_ARRAY_GUARD (out_c, fftSize * 2);
115  CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
116 
117  //conformance test
118  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
119  assert_false ( (snr < SNR_THRESHOLD));
120 
121  /* IFFT test */
122  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
123  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
124 
125  GUARD_ARRAY (out_c, fftSize * 2);
126  GUARD_ARRAY (out_neon, fftSize * 2);
127 
129  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
130 
131  CHECK_ARRAY_GUARD (out_c, fftSize * 2);
132  CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
133 
134  //conformance test
135  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
136  assert_false ( (snr < SNR_THRESHOLD));
137 
138  NE10_FREE (cfg_c);
139  NE10_FREE (cfg_neon);
140  }
141 }
142 
143 void test_fft_c2c_1d_float32_performance()
144 {
145  ne10_int32_t i = 0;
146  ne10_int32_t fftSize = 0;
147  ne10_int32_t flag_result = NE10_OK;
148  ne10_int32_t test_loop = 0;
149 
150  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
151  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
152 
153  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
154  {
155  fprintf (stdout, "FFT size %d\n", fftSize);
156 
157  /* FFT test */
158  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
159  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
160  flag_result = test_c2c_alloc (fftSize);
161  if (flag_result == NE10_ERR)
162  {
163  return;
164  }
165 
166  test_loop = TEST_COUNT / fftSize;
167 
168  GET_TIME
169  (
170  time_c,
171  {
172  for (i = 0; i < test_loop; i++)
174  }
175  );
176  GET_TIME
177  (
178  time_neon,
179  {
180  for (i = 0; i < test_loop; i++)
181  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
182  }
183  );
184 
185  time_speedup = (ne10_float32_t) time_c / time_neon;
186  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
187  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
188 
189  /* IFFT test */
190  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_float32_t));
191  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_float32_t));
192 
193  GET_TIME
194  (
195  time_c,
196  {
197  for (i = 0; i < test_loop; i++)
199  }
200  );
201  GET_TIME
202  (
203  time_neon,
204  {
205  for (i = 0; i < test_loop; i++)
206  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
207  }
208  );
209 
210  time_speedup = (ne10_float32_t) time_c / time_neon;
211  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
212  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
213 
214  NE10_FREE (cfg_c);
215  NE10_FREE (cfg_neon);
216  }
217 }
218 
219 void test_fft_r2c_1d_float32_conformance()
220 {
221 
222  ne10_int32_t i = 0;
223  ne10_int32_t fftSize = 0;
225 
226  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
227 
228  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
229  {
230  fprintf (stdout, "FFT size %d\n", fftSize);
231 
232  /* FFT test */
233  memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
234  memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
235  cfg = ne10_fft_alloc_r2c_float32 (fftSize);
236  if (cfg == NULL)
237  {
238  fprintf (stdout, "======ERROR, FFT alloc fails\n");
239  return;
240  }
241 
242  GUARD_ARRAY (out_c, (fftSize / 2 + 1) * 2);
243  GUARD_ARRAY (out_neon, (fftSize / 2 + 1) * 2);
244 
245  ne10_fft_r2c_1d_float32_c ( (ne10_fft_cpx_float32_t*) out_c, in_c, cfg);
246  ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
247 
248  CHECK_ARRAY_GUARD (out_c, (fftSize / 2 + 1) * 2);
249  CHECK_ARRAY_GUARD (out_neon, (fftSize / 2 + 1) * 2);
250 
251  //conformance test
252  snr = CAL_SNR_FLOAT32 (out_c, out_neon, (fftSize / 2 + 1) * 2);
253  assert_false ( (snr < SNR_THRESHOLD));
254 
255  /* IFFT test */
256  for (i = 1; i < (fftSize / 2); i++)
257  {
258  in_c[2 * i] = testInput_f32[2 * i];
259  in_c[2 * i + 1] = testInput_f32[2 * i + 1];
260  in_c[2 * (fftSize - i)] = in_c[2 * i];
261  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
262  }
263  in_c[0] = testInput_f32[0];
264  in_c[1] = 0;
265  in_c[fftSize] = testInput_f32[1];
266  in_c[fftSize + 1] = 0;
267  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
268 
269  GUARD_ARRAY (out_c, fftSize);
270  GUARD_ARRAY (out_neon, fftSize);
271 
273  ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
274 
275  CHECK_ARRAY_GUARD (out_c, fftSize);
276  CHECK_ARRAY_GUARD (out_neon, fftSize);
277 
278  //conformance test
279  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize);
280  assert_false ( (snr < SNR_THRESHOLD));
281 
282  NE10_FREE (cfg);
283  }
284 }
285 
286 void test_fft_r2c_1d_float32_performance()
287 {
288 
289  ne10_int32_t i = 0;
290  ne10_int32_t fftSize = 0;
292  ne10_int32_t test_loop = 0;
293 
294  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
295  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
296 
297  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
298  {
299  fprintf (stdout, "FFT size %d\n", fftSize);
300 
301  /* FFT test */
302  memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
303  memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
304  cfg = ne10_fft_alloc_r2c_float32 (fftSize);
305  if (cfg == NULL)
306  {
307  fprintf (stdout, "======ERROR, FFT alloc fails\n");
308  return;
309  }
310  test_loop = TEST_COUNT / fftSize;
311 
312  GET_TIME
313  (
314  time_c,
315  {
316  for (i = 0; i < test_loop; i++)
317  ne10_fft_r2c_1d_float32_c ( (ne10_fft_cpx_float32_t*) out_c, in_c, cfg);
318  }
319  );
320  GET_TIME
321  (
322  time_neon,
323  {
324  for (i = 0; i < test_loop; i++)
325  ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
326  }
327  );
328 
329  time_speedup = (ne10_float32_t) time_c / time_neon;
330  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
331  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
332 
333  /* IFFT test */
334  for (i = 1; i < (fftSize / 2); i++)
335  {
336  in_c[2 * i] = testInput_f32[2 * i];
337  in_c[2 * i + 1] = testInput_f32[2 * i + 1];
338  in_c[2 * (fftSize - i)] = in_c[2 * i];
339  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
340  }
341  in_c[0] = testInput_f32[0];
342  in_c[1] = 0;
343  in_c[fftSize] = testInput_f32[1];
344  in_c[fftSize + 1] = 0;
345  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
346 
347  GET_TIME
348  (
349  time_c,
350  {
351  for (i = 0; i < test_loop; i++)
353  }
354  );
355  GET_TIME
356  (
357  time_neon,
358  {
359  for (i = 0; i < test_loop; i++)
360  ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
361  }
362  );
363 
364  time_speedup = (ne10_float32_t) time_c / time_neon;
365  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
366  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
367 
368  NE10_FREE (cfg);
369  }
370 }
371 
372 static void my_test_setup (void)
373 {
374  ne10_log_buffer_ptr = ne10_log_buffer;
375  ne10_int32_t i;
376 
377  /* init input memory */
378  guarded_in_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
379  guarded_in_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
380  in_c = guarded_in_c + ARRAY_GUARD_LEN;
381  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
382 
383  /* init dst memory */
384  guarded_out_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
385  guarded_out_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
386  out_c = guarded_out_c + ARRAY_GUARD_LEN;
387  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
388 
389  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
390  {
391  testInput_f32[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
392  }
393 }
394 
395 static void my_test_teardown (void)
396 {
397  NE10_FREE (guarded_in_c);
398  NE10_FREE (guarded_in_neon);
399  NE10_FREE (guarded_out_c);
400  NE10_FREE (guarded_out_neon);
401 }
402 
403 void test_fft_c2c_1d_float32()
404 {
405 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
406  test_fft_c2c_1d_float32_conformance();
407 #endif
408 
409 #if defined (PERFORMANCE_TEST)
410  test_fft_c2c_1d_float32_performance();
411 #endif
412 }
413 
414 void test_fft_r2c_1d_float32()
415 {
416 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
417  test_fft_r2c_1d_float32_conformance();
418 #endif
419 
420 #if defined (PERFORMANCE_TEST)
421  test_fft_r2c_1d_float32_performance();
422 #endif
423 }
424 
425 void test_fixture_fft_c2c_1d_float32 (void)
426 {
427  test_fixture_start(); // starts a fixture
428 
429  fixture_setup (my_test_setup);
430 
431  run_test (test_fft_c2c_1d_float32); // run tests
432 
433  fixture_teardown(my_test_teardown);
434 
435  test_fixture_end(); // ends a fixture
436 }
437 
438 void test_fixture_fft_r2c_1d_float32 (void)
439 {
440  test_fixture_start(); // starts a fixture
441 
442  fixture_setup (my_test_setup);
443 
444  run_test (test_fft_r2c_1d_float32); // run tests
445 
446  fixture_teardown(my_test_teardown);
447 
448  test_fixture_end(); // ends a fixture
449 }
450 
451 ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize)
452 {
453  NE10_FREE (cfg_c);
454  NE10_FREE (cfg_neon);
455 
456  cfg_c = ne10_fft_alloc_c2c_float32_c (fftSize);
457  if (cfg_c == NULL)
458  {
459  fprintf (stdout, "======ERROR, FFT alloc fails\n");
460  return NE10_ERR;
461  }
462 
463  cfg_neon = ne10_fft_alloc_c2c_float32_neon (fftSize);
464  if (cfg_neon == NULL)
465  {
466  NE10_FREE (cfg_c);
467  fprintf (stdout, "======ERROR, FFT alloc fails\n");
468  return NE10_ERR;
469  }
470  return NE10_OK;
471 }
ne10_fft_state_float32_t
structure for the floating point FFT state
Definition: NE10_types.h:240
ne10_fft_c2r_1d_float32_c
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
Definition: NE10_fft_float32.c:1305
ne10_fft_c2c_1d_float32_neon
void ne10_fft_c2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
Definition: NE10_fft_float32.neonintrinsic.c:1459
ne10_fft_alloc_r2c_float32
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
Definition: NE10_fft_float32.c:1193
ne10_fft_alloc_c2c_float32_neon
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft.c:337
ne10_fft_cpx_float32_t
Definition: NE10_types.h:230
ne10_fft_alloc_c2c_float32_c
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_float32.c:997
ne10_fft_c2c_1d_float32_c
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
Definition: NE10_fft_float32.c:1065
ne10_fft_r2c_state_float32_t
Definition: NE10_types.h:272
ne10_fft_c2r_1d_float32_neon
void ne10_fft_c2r_1d_float32_neon(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
Definition: NE10_rfft_float32.neonintrinsic.c:1752
ne10_fft_r2c_1d_float32_c
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
Definition: NE10_fft_float32.c:1285
ne10_fft_r2c_1d_float32_neon
void ne10_fft_r2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
Definition: NE10_rfft_float32.neonintrinsic.c:1717