36 #include "NE10_physics.h"
38 #include "unit_test_common.h"
43 #define TEST_LENGTH_SAMPLES 1024
44 #define TEST_COUNT 5000
46 static ne10_int64_t time_c = 0;
47 static ne10_int64_t time_neon = 0;
48 static ne10_float32_t time_speedup = 0.0f;
49 static ne10_float32_t time_savings = 0.0f;
51 static void float_array_assignment (ne10_float32_t *array, ne10_int32_t len)
54 for (i = 0; i < len; i++)
56 array[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
61 void test_compute_aabb_vec2f_conformance()
63 #if defined ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
66 ne10_mat2x2f_t aabb_c, aabb_neon;
68 ne10_int32_t vertex_count;
69 ne10_int32_t vec_size =
sizeof (ne10_mat2x2f_t) /
sizeof (ne10_float32_t);
71 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
76 float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
77 memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t));
79 ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
80 xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
81 xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
85 #if defined (REGRESSION_TEST)
86 for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count++)
91 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
92 printf (
"----vertex_count %d\n", vertex_count);
93 assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
95 #else // defined (SMOKE_TEST)
96 for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 3)
101 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
102 printf (
"----vertex_count %d\n", vertex_count);
103 assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
107 free (vertices_neon);
111 void test_compute_aabb_vec2f_performance()
115 ne10_mat2x2f_t aabb_c;
118 ne10_int32_t vertex_count;
121 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
122 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"vertex count",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
127 float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
128 memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t));
130 ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
131 xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
132 xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
133 xf.c2.r1 = sin (tmp);
134 xf.c2.r2 = cos (tmp);
136 for (vertex_count = 4; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 4)
142 for (i = 0; i < TEST_COUNT; i++)
147 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
149 ne10_mat2x2f_t aabb_neon;
153 for (i = 0; i < TEST_COUNT; i++)
154 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
157 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
158 time_speedup = (ne10_float32_t) time_c / time_neon;
159 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
160 printf (
"vertax count: %10d time C: %10lld time NEON: %10lld\n", vertex_count, time_c, time_neon);
164 free (vertices_neon);
167 void test_relative_v_vec2f_conformance()
169 #if defined ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
176 ne10_int32_t vec_size =
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t);
178 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
185 float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
186 float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
187 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
188 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
191 guarded_dv_c = (
ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
192 guarded_dv_neon = (
ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
193 dv_c = (
ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
194 dv_neon = (
ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
196 #if defined (REGRESSION_TEST)
197 for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
199 GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
200 GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
205 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
207 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
208 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
209 printf (
"----count %d\n", count);
210 for (i = 0; i < count; i++)
211 assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
213 #else // defined (SMOKE_TEST)
214 for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
216 GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
217 GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
222 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
224 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
225 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
226 printf (
"----count %d\n", count);
227 for (i = 0; i < count; i++)
228 assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
236 free (guarded_dv_neon);
240 void test_relative_v_vec2f_performance()
250 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
251 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"count",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
258 float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
259 float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
260 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
261 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
264 guarded_dv_c = (
ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
265 guarded_dv_neon = (
ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
266 dv_c = (
ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
267 dv_neon = (
ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
269 for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
275 for (i = 0; i < TEST_COUNT; i++)
279 #ifdef ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
284 for (i = 0; i < TEST_COUNT; i++)
285 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
288 time_speedup = (ne10_float32_t) time_c / time_neon;
289 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
290 printf (
"count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
292 #endif // ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
300 free (guarded_dv_neon);
303 void test_apply_impulse_vec2f_conformance()
305 #if defined ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
306 ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
311 ne10_int32_t vec_size =
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t);
313 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
321 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
322 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
323 float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
324 float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
325 float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
328 guarded_v_wa_c = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
329 guarded_v_wa_neon = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
330 guarded_v_wb_c = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
331 guarded_v_wb_neon = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
332 v_wa_c = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
333 v_wa_neon = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
334 v_wb_c = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
335 v_wb_neon = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
336 float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
337 float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
338 memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t));
339 memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t));
341 #if defined (REGRESSION_TEST)
342 for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
344 GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
345 GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
346 GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
347 GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
352 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
354 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
355 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
356 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
357 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
359 printf (
"----count %d\n", count);
360 for (i = 0; i < count; i++)
362 assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
363 assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
366 #else // defined (SMOKE_TEST)
367 for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
369 GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
370 GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
371 GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
372 GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
377 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
379 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
380 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
381 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
382 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
383 printf (
"----count %d\n", count);
384 for (i = 0; i < count; i++)
386 assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
387 assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
396 free (guarded_v_wa_c);
397 free (guarded_v_wa_neon);
398 free (guarded_v_wb_c);
399 free (guarded_v_wb_neon);
403 void test_apply_impulse_vec2f_performance()
405 ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
412 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
413 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"count",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
421 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
422 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
423 float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
424 float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
425 float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec2f_t) /
sizeof (ne10_float32_t));
428 guarded_v_wa_c = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
429 guarded_v_wa_neon = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
430 guarded_v_wb_c = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
431 guarded_v_wb_neon = (
ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 *
sizeof (ne10_float32_t));
432 v_wa_c = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
433 v_wa_neon = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
434 v_wb_c = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
435 v_wb_neon = (
ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
436 float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
437 float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t) /
sizeof (ne10_float32_t));
438 memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t));
439 memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES *
sizeof (
ne10_vec3f_t));
441 for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
447 for (i = 0; i < TEST_COUNT; i++)
452 #ifdef ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
457 for (i = 0; i < TEST_COUNT; i++)
458 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
461 #endif // ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
462 time_speedup = (ne10_float32_t) time_c / time_neon;
463 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
464 printf (
"count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
473 free (guarded_v_wa_c);
474 free (guarded_v_wa_neon);
475 free (guarded_v_wb_c);
476 free (guarded_v_wb_neon);
479 void test_compute_aabb_vec2f()
481 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
482 test_compute_aabb_vec2f_conformance();
485 #if defined (PERFORMANCE_TEST)
486 test_compute_aabb_vec2f_performance();
490 void test_relative_v_vec2f()
492 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
493 test_relative_v_vec2f_conformance();
496 #if defined (PERFORMANCE_TEST)
497 test_relative_v_vec2f_performance();
501 void test_apply_impulse_vec2f()
503 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
504 test_apply_impulse_vec2f_conformance();
507 #if defined (PERFORMANCE_TEST)
508 test_apply_impulse_vec2f_performance();
512 void my_test_setup (
void)
517 void my_test_teardown (
void)
522 void test_fixture_physics (
void)
524 test_fixture_start();
526 fixture_setup (my_test_setup);
527 fixture_teardown (my_test_teardown);
529 run_test (test_compute_aabb_vec2f);
530 run_test (test_relative_v_vec2f);
531 run_test (test_apply_impulse_vec2f);