best libm time for 1000 calls: 3.06011e-06 seconds (792544)
Hmm... that time suggests 6 cycles for a atan, which I think is not
reasonable. I probably got defeated by a compiler optimization here
somewhere...
Alrighty, once the compiler is defeated, we see that the speed
improvement is there:
ollmia:/tmp iano$ gcc -O3 main4.c -Wmost
ollmia:/tmp iano$ ./a.out
best libm time for 1000 calls: 0.000113 seconds (801939)
best cheesy time for 1000 calls: 0.000017 seconds (800454)
bestTime = 1e20;
for( i = 0; i < 1000; i++ )
{
startTime = mach_absolute_time();
for( j = 0; j < 1000; j++ )
sum += atan2f(x[j], y[j] );
endTime = mach_absolute_time();
currentTime = MySubtractTime( endTime, startTime );
if( currentTime < bestTime )
bestTime = currentTime;
}
printf("best libm time for 1000 calls: %f seconds (%g)\n",
bestTime, sum );
bestTime = 1e20;
sum = 0;
for( i = 0; i < 1000; i++ )
{
startTime = mach_absolute_time();
for( j = 0; j < 1000; j++ )
sum += arctan2(x[j], y[j] );
endTime = mach_absolute_time();
currentTime = MySubtractTime( endTime, startTime );
if( currentTime < bestTime )
bestTime = currentTime;
}
printf("best cheesy time for 1000 calls: %f seconds (%g)\n",
bestTime, sum );
//some cheesy error checking to go with the cheesy function
//please note that this is VERY cheesy and should in no way to
be construed
//to be even hinting at an endorsement of anything.
for( j = 0; j < 1000; j++ )
{
float test = arctan2(x[j], y[j] );
float correct = atan2f( x[j], y[j] );
double currentError = correct - test;
if( currentError > maxError )
{
printf( "atan2(%g, %g): %g %g %g\n", x[j],
y[j], correct, test, currentError );
maxError = currentError;
}