mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 07:39:01 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
@@ -23,6 +23,10 @@ set(${MODULE_NAME}_SOURCES
|
||||
../../test_common/harness/ThreadPool.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
|
||||
@@ -46,26 +50,7 @@ set_source_files_properties(
|
||||
endif(NOT ANDROID)
|
||||
|
||||
set_source_files_properties(
|
||||
FunctionList.c
|
||||
Sleep.c
|
||||
binary.c
|
||||
binaryOperator.c
|
||||
Utility.c
|
||||
binary_i.c
|
||||
binary_two_results_i.c
|
||||
i_unary.c
|
||||
macro_binary.c
|
||||
macro_unary.c
|
||||
mad.c
|
||||
main.c
|
||||
reference_math.c
|
||||
ternary.c
|
||||
unary.c
|
||||
unary_two_results.c
|
||||
unary_two_results_i.c unary_u.c
|
||||
../../test_common/harness/rounding_mode.c
|
||||
../../test_common/harness/ThreadPool.c
|
||||
../../test_common/harness/msvc9.c
|
||||
${MODULE_NAME}_SOURCES
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
@@ -85,7 +85,7 @@ typedef struct Func
|
||||
float relaxed_error;
|
||||
int ftz;
|
||||
int relaxed;
|
||||
const ::vtbl *vtbl;
|
||||
const vtbl *vtbl_ptr;
|
||||
}Func;
|
||||
|
||||
|
||||
|
||||
@@ -26,11 +26,7 @@
|
||||
#include <stdio.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
#include "../../test_common/harness/fpcontrol.h"
|
||||
|
||||
#if defined( _WIN32) && defined (_MSC_VER)
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/ThreadPool.h"
|
||||
#define BUFFER_SIZE (1024*1024*2)
|
||||
|
||||
@@ -112,7 +108,7 @@ extern "C" {
|
||||
float Abs_Error( float test, double reference );
|
||||
float Ulp_Error( float test, double reference );
|
||||
//float Ulp_Error_Half( float test, double reference );
|
||||
float Ulp_Error_Double( double test, long double reference );
|
||||
float Bruteforce_Ulp_Error_Double( double test, long double reference );
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
@@ -233,6 +233,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
@@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
@@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
@@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
@@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
@@ -1359,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
cl_double test = ((cl_double*) q)[j];
|
||||
long double correct = func.f_ff( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
if( fail && ftz )
|
||||
@@ -1399,8 +1420,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_ff( 0.0, s2[j] );
|
||||
long double correct3 = func.f_ff( -0.0, s2[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -1422,10 +1443,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
correct3 = func.f_ff( -0.0, 0.0 );
|
||||
long double correct4 = func.f_ff( 0.0, -0.0 );
|
||||
long double correct5 = func.f_ff( -0.0, -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
|
||||
(!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -1451,8 +1472,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_ff( s[j], 0.0 );
|
||||
long double correct3 = func.f_ff( s[j], -0.0 );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
|
||||
@@ -207,6 +207,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
@@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
}
|
||||
|
||||
test_info.step = test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
@@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
@@ -501,63 +512,51 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
|
||||
if( job_id <= (cl_uint)indx ) {
|
||||
// Insert special values
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
fp2[j] = specialValuesFloat[y];
|
||||
if( ++x >= specialValuesFloatCount )
|
||||
{
|
||||
for( ; j < buffer_elements; j++ ) {
|
||||
p[j] = ((cl_uint *)specialValuesFloat)[x];
|
||||
p2[j] = ((cl_uint *)specialValuesFloat)[y];
|
||||
++x;
|
||||
if (x >= specialValuesFloatCount) {
|
||||
x = 0;
|
||||
y++;
|
||||
if( y >= specialValuesFloatCount )
|
||||
if (y >= specialValuesFloatCount)
|
||||
break;
|
||||
}
|
||||
if(gTestFastRelaxed && strcmp(name,"divide") == 0 )
|
||||
{
|
||||
float fpj = *(float*)&fp[j];
|
||||
float fpj2 = *(float*)&fp2[j];
|
||||
if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62]
|
||||
{
|
||||
fp[j] = NAN;
|
||||
}
|
||||
if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62]
|
||||
{
|
||||
fp2[j] = NAN;
|
||||
}
|
||||
if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
|
||||
cl_uint pj = p[j] & 0x7fffffff;
|
||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
||||
// Replace values outside [2^-62, 2^62] with QNaN
|
||||
if (pj < 0x20800000 || pj > 0x5e800000)
|
||||
p[j] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000)
|
||||
p2[j] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Init any remaining values.
|
||||
// Init any remaining values.
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
|
||||
if(gTestFastRelaxed)
|
||||
{
|
||||
if( strcmp(name,"divide")==0){
|
||||
float pj = *(float*)&p[j];
|
||||
float pj2 = *(float*)&p2[j];
|
||||
if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62]
|
||||
{
|
||||
p[j] = NAN;
|
||||
}
|
||||
if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62]
|
||||
{
|
||||
p2[j] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
|
||||
cl_uint pj = p[j] & 0x7fffffff;
|
||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
||||
// Replace values outside [2^-62, 2^62] with QNaN
|
||||
if (pj < 0x20800000 || pj > 0x5e800000)
|
||||
p[j] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000)
|
||||
p2[j] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
@@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
}
|
||||
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
@@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
@@ -1315,7 +1324,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
cl_double test = ((cl_double*) q)[j];
|
||||
long double correct = func.f_ff( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
if( fail && ftz )
|
||||
@@ -1334,8 +1343,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_ff( 0.0, s2[j] );
|
||||
long double correct3 = func.f_ff( -0.0, s2[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -1357,10 +1366,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
correct3 = func.f_ff( -0.0, 0.0 );
|
||||
long double correct4 = func.f_ff( 0.0, -0.0 );
|
||||
long double correct5 = func.f_ff( -0.0, -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
|
||||
(!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -1386,8 +1395,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_ff( s[j], 0.0 );
|
||||
long double correct3 = func.f_ff( s[j], -0.0 );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
|
||||
@@ -230,6 +230,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
@@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
@@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
@@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
@@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
@@ -1128,7 +1149,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
cl_double test = ((cl_double*) q)[j];
|
||||
long double correct = func.f_fi( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
if( fail && ftz )
|
||||
@@ -1146,8 +1167,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_fi( 0.0, s2[j] );
|
||||
long double correct3 = func.f_fi( -0.0, s2[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
|
||||
@@ -871,7 +871,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
|
||||
double test = ((double*) q)[j];
|
||||
int correct2 = INT_MIN;
|
||||
long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int64_t iErr;
|
||||
|
||||
// in case of remquo, we only care about the sign and last seven bits of
|
||||
@@ -907,8 +907,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
|
||||
int correct3i, correct4i;
|
||||
long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
|
||||
long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
|
||||
float err2 = Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Ulp_Error_Double( test, correct4 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
|
||||
int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
|
||||
@@ -937,10 +937,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
|
||||
correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
|
||||
long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
|
||||
long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
|
||||
err2 = Ulp_Error_Double( test, correct3 );
|
||||
err3 = Ulp_Error_Double( test, correct4 );
|
||||
float err4 = Ulp_Error_Double( test, correct7 );
|
||||
float err5 = Ulp_Error_Double( test, correct8 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct7 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct8 );
|
||||
iErr3 = (long long) q2[j] - (long long) correct3i;
|
||||
iErr4 = (long long) q2[j] - (long long) correct4i;
|
||||
int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
|
||||
@@ -979,8 +979,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
|
||||
int correct3i, correct4i;
|
||||
long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
|
||||
long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
|
||||
float err2 = Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Ulp_Error_Double( test, correct4 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
|
||||
int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
|
||||
|
||||
@@ -222,6 +222,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
@@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
@@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
@@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
}
|
||||
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
@@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -193,6 +193,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
@@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
|
||||
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
@@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
@@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
|
||||
}
|
||||
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
@@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -785,7 +785,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
{
|
||||
double test = ((double*) q)[j];
|
||||
long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= f->double_ulps);
|
||||
|
||||
if( fail && ftz )
|
||||
@@ -803,8 +803,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
{ // look at me,
|
||||
long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
|
||||
long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -826,10 +826,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
|
||||
long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
|
||||
long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -860,14 +860,14 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
|
||||
long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
|
||||
long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
err4 = Ulp_Error_Double( test, correct4 );
|
||||
err5 = Ulp_Error_Double( test, correct5 );
|
||||
float err6 = Ulp_Error_Double( test, correct6 );
|
||||
float err7 = Ulp_Error_Double( test, correct7 );
|
||||
float err8 = Ulp_Error_Double( test, correct8 );
|
||||
float err9 = Ulp_Error_Double( test, correct9 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
float err6 = Bruteforce_Ulp_Error_Double( test, correct6 );
|
||||
float err7 = Bruteforce_Ulp_Error_Double( test, correct7 );
|
||||
float err8 = Bruteforce_Ulp_Error_Double( test, correct8 );
|
||||
float err9 = Bruteforce_Ulp_Error_Double( test, correct9 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
|
||||
(!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
|
||||
@@ -907,10 +907,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
|
||||
long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 );
|
||||
long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -936,8 +936,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
{
|
||||
long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
|
||||
long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -959,10 +959,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
|
||||
long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
|
||||
long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -988,8 +988,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
|
||||
{
|
||||
long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
|
||||
long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
|
||||
// *signgamp = 1;
|
||||
ix = hx&0x7fffffff;
|
||||
if(ix>=0x7ff00000) return x*x;
|
||||
if((ix|lx)==0) return one/zero;
|
||||
if((ix|lx)==0) return INFINITY;
|
||||
if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
|
||||
if(hx<0) {
|
||||
// *signgamp = -1;
|
||||
@@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
|
||||
}
|
||||
if(hx<0) {
|
||||
if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
|
||||
return one/zero;
|
||||
return INFINITY;
|
||||
t = reference_sinpi(x);
|
||||
if(t==zero) return one/zero; /* -integer */
|
||||
if(t==zero) return INFINITY; /* -integer */
|
||||
nadj = reference_log(pi/reference_fabs(t*x));
|
||||
// if(t<zero) *signgamp = -1;
|
||||
x = -x;
|
||||
|
||||
0
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
0
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
@@ -1010,7 +1010,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
{
|
||||
double test = ((double*) q)[j];
|
||||
long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= f->double_ulps);
|
||||
|
||||
if( fail && ftz )
|
||||
@@ -1028,8 +1028,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
{ // look at me,
|
||||
long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
|
||||
long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -1051,10 +1051,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
|
||||
long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
|
||||
long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -1085,14 +1085,14 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
|
||||
long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
|
||||
long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
err4 = Ulp_Error_Double( test, correct4 );
|
||||
err5 = Ulp_Error_Double( test, correct5 );
|
||||
float err6 = Ulp_Error_Double( test, correct6 );
|
||||
float err7 = Ulp_Error_Double( test, correct7 );
|
||||
float err8 = Ulp_Error_Double( test, correct8 );
|
||||
float err9 = Ulp_Error_Double( test, correct9 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
float err6 = Bruteforce_Ulp_Error_Double( test, correct6 );
|
||||
float err7 = Bruteforce_Ulp_Error_Double( test, correct7 );
|
||||
float err8 = Bruteforce_Ulp_Error_Double( test, correct8 );
|
||||
float err9 = Bruteforce_Ulp_Error_Double( test, correct9 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
|
||||
(!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
|
||||
@@ -1132,10 +1132,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
|
||||
long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 );
|
||||
long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -1161,8 +1161,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
{
|
||||
long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
|
||||
long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -1184,10 +1184,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
|
||||
long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
|
||||
long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
|
||||
err2 = Ulp_Error_Double( test, correct2 );
|
||||
err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Ulp_Error_Double( test, correct5 );
|
||||
err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
|
||||
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
@@ -1213,8 +1213,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
|
||||
{
|
||||
long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
|
||||
long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
|
||||
@@ -200,6 +200,7 @@ typedef struct TestInfo
|
||||
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
|
||||
ThreadInfo *tinfo; // An array of thread specific information for each worker thread
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
@@ -234,6 +235,16 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
|
||||
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
@@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting || skipTestingRelaxed)
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
@@ -892,7 +903,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
cl_double test = ((cl_double*) q)[j];
|
||||
long double correct = func.f_f( s[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
if( fail )
|
||||
@@ -912,8 +923,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
long double correct2 = func.f_f( 0.0L );
|
||||
long double correct3 = func.f_f( -0.0L );
|
||||
float err2 = Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Ulp_Error_Double( test, correct3 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
if( fabsf( err2 ) < fabsf(err ) )
|
||||
err = err2;
|
||||
@@ -997,7 +1008,16 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
|
||||
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
|
||||
}
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
//there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
@@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
|
||||
@@ -800,8 +800,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
|
||||
double test2 = ((double*) q2)[j];
|
||||
long double correct2;
|
||||
long double correct = f->dfunc.f_fpf( s[j], &correct2 );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err2 = Ulp_Error_Double( test2, correct2 );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
|
||||
int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
|
||||
if( ftz )
|
||||
{
|
||||
@@ -837,10 +837,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
|
||||
long double correct2p, correct2n;
|
||||
long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
|
||||
long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
|
||||
float errp = Ulp_Error_Double( test, correctp );
|
||||
float err2p = Ulp_Error_Double( test, correct2p );
|
||||
float errn = Ulp_Error_Double( test, correctn );
|
||||
float err2n = Ulp_Error_Double( test, correct2n );
|
||||
float errp = Bruteforce_Ulp_Error_Double( test, correctp );
|
||||
float err2p = Bruteforce_Ulp_Error_Double( test, correct2p );
|
||||
float errn = Bruteforce_Ulp_Error_Double( test, correctn );
|
||||
float err2n = Bruteforce_Ulp_Error_Double( test, correct2n );
|
||||
fail = fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps)) &&
|
||||
((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
|
||||
if( fabsf( errp ) < fabsf(err ) )
|
||||
|
||||
@@ -633,7 +633,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
|
||||
double test = ((double*) q)[j];
|
||||
int correct2 = INT_MIN;
|
||||
long double correct = f->dfunc.f_fpI( s[j], &correct2 );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
float err = Bruteforce_Ulp_Error_Double( test, correct );
|
||||
cl_long iErr = (long long) q2[j] - (long long) correct2;
|
||||
int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
|
||||
if( ftz )
|
||||
@@ -652,8 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
|
||||
int correct5, correct6;
|
||||
long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
|
||||
long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
|
||||
float err2 = Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Ulp_Error_Double( test, correct4 );
|
||||
float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
|
||||
float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
|
||||
cl_long iErr2 = (long long) q2[j] - (long long) correct5;
|
||||
cl_long iErr3 = (long long) q2[j] - (long long) correct6;
|
||||
|
||||
|
||||
@@ -567,7 +567,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d)
|
||||
{
|
||||
double test = ((double*) q)[j];
|
||||
long double correct = f->dfunc.f_u( s[j] );
|
||||
float err = Ulp_Error_Double(test, correct);
|
||||
float err = Bruteforce_Ulp_Error_Double(test, correct);
|
||||
int fail = ! (fabsf(err) <= f->double_ulps);
|
||||
|
||||
// half_sin/cos/tan are only valid between +-2**16, Inf, NaN
|
||||
|
||||
Reference in New Issue
Block a user