diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h index b2b0ec07..460e7e82 100644 --- a/test_conformance/math_brute_force/FunctionList.h +++ b/test_conformance/math_brute_force/FunctionList.h @@ -68,8 +68,10 @@ struct Func; typedef struct vtbl { const char *type_name; - int (*TestFunc)( const struct Func *, MTdata ); - int (*DoubleTestFunc)( const struct Func *, MTdata); // may be NULL if function is single precision only + int (*TestFunc)(const struct Func *, MTdata, bool); + int (*DoubleTestFunc)( + const struct Func *, MTdata, + bool); // may be NULL if function is single precision only }vtbl; typedef struct Func diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h index 9c14910b..02bb4b9f 100644 --- a/test_conformance/math_brute_force/Utility.h +++ b/test_conformance/math_brute_force/Utility.h @@ -64,7 +64,6 @@ extern int gSkipCorrectnessTesting; extern int gMeasureTimes; extern int gReportAverageTimes; extern int gForceFTZ; -extern volatile int gTestFastRelaxed; extern int gFastRelaxedDerived; extern int gWimpyMode; extern int gHasDouble; @@ -97,8 +96,11 @@ float Bruteforce_Ulp_Error_Double( double test, long double reference ); uint64_t GetTime( void ); double SubtractTime( uint64_t endTime, uint64_t startTime ); -int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p ); -int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, + cl_program *p, bool relaxedMode); +int MakeKernels(const char **c, cl_uint count, const char *name, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); // used to convert a bucket of bits into a search pattern through double static inline double DoubleFromUInt32( uint32_t bits ); diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp index 4155a411..f53d2f12 100644 --- a/test_conformance/math_brute_force/binary.cpp +++ b/test_conformance/math_brute_force/binary.cpp @@ -18,12 +18,16 @@ #include #include "FunctionList.h" -int TestFunc_Float_Float_Float(const Func *f, MTdata); -int TestFunc_Double_Double_Double(const Func *f, MTdata); -int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata); -int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata); -int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter); -int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter); +int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata, + bool relaxedMode); +int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata, + bool relaxedMode); +int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter, + bool relaxedMode); +int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter, + bool relaxedMode); const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); @@ -36,9 +40,11 @@ extern const vtbl _binary_nextafter = { TestFunc_Double_Double_Double_nextafter }; -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" "{\n" @@ -98,10 +104,13 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n" @@ -163,7 +172,8 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } // A table of more difficult cases to get right @@ -192,6 +202,7 @@ typedef struct BuildKernelInfo cl_kernel **kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -199,7 +210,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -207,7 +219,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); } //Thread specific data for a worker thread @@ -240,11 +254,14 @@ typedef struct TestInfo int isFDim; int skipNanInf; int isNextafter; -}TestInfo; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +} TestInfo; static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) +int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter, + bool relaxedMode) { TestInfo test_info; cl_int error; @@ -254,7 +271,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) double maxErrorVal2 = 0.0; int skipTestingRelaxed = 0; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -284,6 +301,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode ); test_info.skipNanInf = test_info.isFDim && ! gInfNanSupport; test_info.isNextafter = isNextafter; + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for every thread for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) { @@ -342,7 +360,10 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -482,6 +503,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) float ulps = job->ulps; fptr func = job->f->func; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_uint j, k; cl_int error; @@ -496,7 +518,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) RoundingMode oldRoundMode; int skipVerification = 0; - if(gTestFastRelaxed) + if (relaxedMode) { if (strcmp(name,"pow")==0 && gFastRelaxedDerived) { @@ -710,7 +732,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables // -cl-finite-math-only optimization. This optimization allows to assume that arguments and // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if ( gTestFastRelaxed || skipNanInf) + if (relaxedMode || skipNanInf) { if( skipNanInf && overflow[j]) continue; @@ -772,7 +794,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables // -cl-finite-math-only optimization. This optimization allows to assume that arguments and // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if( gTestFastRelaxed || skipNanInf ) + if (relaxedMode || skipNanInf) { if( fetestexcept(FE_OVERFLOW) && skipNanInf ) continue; @@ -817,7 +839,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables // -cl-finite-math-only optimization. This optimization allows to assume that arguments and // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if( gTestFastRelaxed || skipNanInf ) + if (relaxedMode || skipNanInf) { if( fetestexcept(FE_OVERFLOW) && skipNanInf ) continue; @@ -870,7 +892,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables // -cl-finite-math-only optimization. This optimization allows to assume that arguments and // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs. - if ( gTestFastRelaxed || skipNanInf ) + if (relaxedMode || skipNanInf) { // Note: no double rounding here. Reference functions calculate in single precision. if( overflow[j] && skipNanInf) @@ -977,7 +999,8 @@ static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafter) +int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, + int isNextafter, bool relaxedMode) { TestInfo test_info; cl_int error; @@ -986,7 +1009,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -1075,7 +1098,10 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -1534,23 +1560,25 @@ exit: } -int TestFunc_Float_Float_Float(const Func *f, MTdata d) +int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { - return TestFunc_Float_Float_Float_common(f, d, 0); + return TestFunc_Float_Float_Float_common(f, d, 0, relaxedMode); } -int TestFunc_Double_Double_Double(const Func *f, MTdata d) +int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) { - return TestFunc_Double_Double_Double_common(f, d, 0); + return TestFunc_Double_Double_Double_common(f, d, 0, relaxedMode); } -int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d) +int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d, + bool relaxedMode) { - return TestFunc_Float_Float_Float_common(f, d, 1); + return TestFunc_Float_Float_Float_common(f, d, 1, relaxedMode); } -int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d) +int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d, + bool relaxedMode) { - return TestFunc_Double_Double_Double_common(f, d, 1); + return TestFunc_Double_Double_Double_common(f, d, 1, relaxedMode); } diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp index 7676625c..96537f2f 100644 --- a/test_conformance/math_brute_force/binaryOperator.cpp +++ b/test_conformance/math_brute_force/binaryOperator.cpp @@ -18,17 +18,25 @@ #include #include "FunctionList.h" -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata); -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata); +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata, + bool relaxedMode); +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata, + bool relaxedMode); extern const vtbl _binary_operator = { "binaryOperator", TestFunc_Float_Float_Float_Operator, TestFunc_Double_Double_Double_Operator }; -static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, const char *operator_symbol, + int vectorSize, cl_uint kernel_count, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, const char *operator_symbol, + int vectorSize, cl_uint kernel_count, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, const char *operator_symbol, + int vectorSize, cl_uint kernel_count, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" @@ -88,11 +96,13 @@ static int BuildKernel( const char *name, const char *operator_symbol, int vecto char testName[32]; snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); - + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } -static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, const char *operator_symbol, + int vectorSize, cl_uint kernel_count, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", @@ -154,8 +164,8 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int char testName[32]; snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); - + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } typedef struct BuildKernelInfo @@ -166,6 +176,7 @@ typedef struct BuildKernelInfo cl_program *programs; const char *name; const char *operator_symbol; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -173,7 +184,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->name, info->operator_symbol, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -181,7 +193,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->name, info->operator_symbol, i, + info->kernel_count, info->kernels[i], + info->programs + i, info->relaxedMode); } //Thread specific data for a worker thread @@ -210,6 +224,8 @@ typedef struct TestInfo cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if the test is being run in relaxed mode, false + // otherwise. // no special fields }TestInfo; @@ -237,7 +253,8 @@ static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( s static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, + bool relaxedMode) { TestInfo test_info; cl_int error; @@ -246,7 +263,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -331,7 +348,13 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, + test_info.threadCount, + test_info.k, + test_info.programs, + f->name, + f->nameInCode, + relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -460,7 +483,7 @@ exit: return error; } -static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) +static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *) data; size_t buffer_elements = job->subBufferSize; @@ -469,9 +492,10 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) ThreadInfo *tinfo = job->tinfo + thread_id; float ulps = job->ulps; fptr func = job->f->func; - if ( gTestFastRelaxed ) + bool relaxedMode = job->relaxedMode; + if (relaxedMode) { - func = job->f->rfunc; + func = job->f->rfunc; } @@ -528,7 +552,8 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) if (y >= specialValuesFloatCount) break; } - if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + if (relaxedMode && strcmp(name, "divide") == 0) + { cl_uint pj = p[j] & 0x7fffffff; cl_uint p2j = p2[j] & 0x7fffffff; // Replace values outside [2^-62, 2^62] with QNaN @@ -546,7 +571,8 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) p[j] = genrand_int32(d); p2[j] = genrand_int32(d); - if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + if (relaxedMode && strcmp(name, "divide") == 0) + { cl_uint pj = p[j] & 0x7fffffff; cl_uint p2j = p2[j] & 0x7fffffff; // Replace values outside [2^-62, 2^62] with QNaN @@ -704,8 +730,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) float err = Ulp_Error( test, correct ); float errB = Ulp_Error( test, (float) correct ); - if( gTestFastRelaxed ) - ulps = job->f->relaxed_error; + if (relaxedMode) ulps = job->f->relaxed_error; int fail = ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps))); if( fabsf( errB ) < fabsf(err ) ) @@ -898,7 +923,6 @@ exit: if( overflow ) free( overflow ); return error; - } @@ -925,7 +949,8 @@ static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, + bool relaxedMode) { TestInfo test_info; cl_int error; @@ -933,7 +958,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) float maxError = 0.0f; double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -1020,7 +1045,13 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, + test_info.threadCount, + test_info.k, + test_info.programs, + f->name, + f->nameInCode, + relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -1162,6 +1193,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) float ulps = job->ulps; dptr func = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_uint j, k; cl_int error; diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp index a29a876d..b7386a52 100644 --- a/test_conformance/math_brute_force/binary_i.cpp +++ b/test_conformance/math_brute_force/binary_i.cpp @@ -19,16 +19,20 @@ #include #include "FunctionList.h" -int TestFunc_Float_Float_Int(const Func *f, MTdata); -int TestFunc_Double_Double_Int(const Func *f, MTdata); +int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double_Double_Int(const Func *f, MTdata, bool relaxedMode); extern const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int, TestFunc_Double_Double_Int }; -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n" "{\n" @@ -89,10 +93,13 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n" @@ -155,7 +162,8 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } typedef struct BuildKernelInfo @@ -165,6 +173,7 @@ typedef struct BuildKernelInfo cl_kernel **kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -172,7 +181,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -180,7 +190,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); } @@ -239,7 +251,7 @@ typedef struct TestInfo static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Float_Float_Int(const Func *f, MTdata d) +int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; @@ -248,7 +260,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) double maxErrorVal = 0.0; cl_int maxErrorVal2 = 0; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -333,7 +345,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -745,7 +760,7 @@ static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( spec static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Double_Double_Int(const Func *f, MTdata d) +int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; @@ -754,7 +769,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) double maxErrorVal = 0.0; cl_int maxErrorVal2 = 0; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -843,7 +858,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp index 91cebf56..c5577b9e 100644 --- a/test_conformance/math_brute_force/binary_two_results_i.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i.cpp @@ -21,17 +21,20 @@ #define PARALLEL_REFERENCE -int TestFunc_FloatI_Float_Float(const Func *f, MTdata); -int TestFunc_DoubleI_Double_Double(const Func *f, MTdata); +int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _binary_two_results_i = { "binary_two_results_i", TestFunc_FloatI_Float_Float, TestFunc_DoubleI_Double_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2)\n" "{\n" @@ -96,10 +99,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2)\n" @@ -166,7 +170,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -175,6 +179,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -182,7 +187,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -190,7 +196,8 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } #if defined PARALLEL_REFERENCE @@ -266,7 +273,7 @@ ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) #endif -int TestFunc_FloatI_Float_Float(const Func *f, MTdata d) +int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -285,7 +292,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d) #if defined PARALLEL_REFERENCE cl_uint threadCount = GetThreadCount(); #endif - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if(gWimpyMode ){ step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -300,7 +307,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; } @@ -695,7 +703,7 @@ exit: return error; } -int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) +int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -710,7 +718,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; uint64_t step = bufferSize / sizeof( double ); - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); if(gWimpyMode ){ step = (1ULL<<32) * gWimpyReductionFactor / (512); } @@ -725,7 +733,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp index 75b9424a..379d8e35 100644 --- a/test_conformance/math_brute_force/i_unary.cpp +++ b/test_conformance/math_brute_force/i_unary.cpp @@ -18,17 +18,20 @@ #include #include "FunctionList.h" -int TestFunc_Int_Float(const Func *f, MTdata); -int TestFunc_Int_Double(const Func *f, MTdata); +int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Int_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _i_unary = { "i_unary", TestFunc_Int_Float, TestFunc_Int_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" "{\n" @@ -85,10 +88,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" @@ -148,7 +152,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -157,6 +161,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -164,7 +169,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -172,10 +178,11 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } -int TestFunc_Int_Float(const Func *f, MTdata d) +int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -187,7 +194,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d) uint64_t step = bufferSize / sizeof( float ); int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1); - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -201,7 +208,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -395,7 +403,7 @@ exit: return error; } -int TestFunc_Int_Double(const Func *f, MTdata d) +int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -407,7 +415,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d) uint64_t step = bufferSize / sizeof( cl_double ); int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1); - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -420,7 +428,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) @@ -623,5 +632,3 @@ exit: return error; } - - diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp index 0670990f..b590f50a 100644 --- a/test_conformance/math_brute_force/macro_binary.cpp +++ b/test_conformance/math_brute_force/macro_binary.cpp @@ -18,16 +18,19 @@ #include #include "FunctionList.h" -int TestMacro_Int_Float_Float(const Func *f, MTdata); -int TestMacro_Int_Double_Double(const Func *f, MTdata); +int TestMacro_Int_Float_Float(const Func *f, MTdata, bool relaxedMode); +int TestMacro_Int_Double_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float, TestMacro_Int_Double_Double }; static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n" "{\n" @@ -88,10 +91,14 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); } + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); +} -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n" @@ -154,7 +161,8 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } typedef struct BuildKernelInfo @@ -164,6 +172,7 @@ typedef struct BuildKernelInfo cl_kernel **kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -171,7 +180,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -179,7 +189,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); } @@ -229,13 +241,13 @@ typedef struct TestInfo static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestMacro_Int_Float_Float(const Func *f, MTdata d) +int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; size_t i, j; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -319,7 +331,10 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -738,13 +753,13 @@ static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p ); -int TestMacro_Int_Double_Double(const Func *f, MTdata d) +int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; size_t i, j; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -832,7 +847,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp index c8d125b5..872007f1 100644 --- a/test_conformance/math_brute_force/macro_unary.cpp +++ b/test_conformance/math_brute_force/macro_unary.cpp @@ -18,16 +18,20 @@ #include #include "FunctionList.h" -int TestMacro_Int_Float(const Func *f, MTdata); -int TestMacro_Int_Double(const Func *f, MTdata); +int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode); +int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _macro_unary = { "macro_unary", TestMacro_Int_Float, TestMacro_Int_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" "{\n" @@ -84,10 +88,13 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" @@ -147,7 +154,8 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } typedef struct BuildKernelInfo @@ -157,6 +165,7 @@ typedef struct BuildKernelInfo cl_kernel **kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -164,7 +173,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -172,7 +182,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); } //Thread specific data for a worker thread @@ -200,13 +212,13 @@ typedef struct TestInfo static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestMacro_Int_Float(const Func *f, MTdata d) +int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; size_t i, j; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -281,7 +293,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -597,13 +612,13 @@ exit: static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ); -int TestMacro_Int_Double(const Func *f, MTdata d) +int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; size_t i, j; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); test_info.threadCount = GetThreadCount(); @@ -681,7 +696,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp index 5eeae35a..0737afbc 100644 --- a/test_conformance/math_brute_force/mad.cpp +++ b/test_conformance/math_brute_force/mad.cpp @@ -18,15 +18,18 @@ #include #include "FunctionList.h" -int TestFunc_mad(const Func *f, MTdata); -int TestFunc_mad_Double(const Func *f, MTdata); +int TestFunc_mad(const Func *f, MTdata, bool relaxedMode); +int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _mad_tbl = { "ternary", TestFunc_mad, TestFunc_mad_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2, __global float", sizeNames[vectorSize], "* in3 )\n" @@ -89,10 +92,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", @@ -157,7 +161,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -166,6 +170,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -173,7 +178,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -181,16 +187,17 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } -int TestFunc_mad(const Func *f, MTdata d) +int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; int error; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); cl_program programs[ VECTOR_SIZE_COUNT ]; cl_kernel kernels[ VECTOR_SIZE_COUNT ]; @@ -207,7 +214,8 @@ int TestFunc_mad(const Func *f, MTdata d) step = (1ULL<<32) * gWimpyReductionFactor / (512); } // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -652,7 +660,7 @@ exit: return error; } -int TestFunc_mad_Double(const Func *f, MTdata d) +int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -666,14 +674,15 @@ int TestFunc_mad_Double(const Func *f, MTdata d) double maxErrorVal3 = 0.0f; size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); uint64_t step = bufferSize / sizeof( double ); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); } // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 1e33b95a..39e9a78e 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -68,8 +68,9 @@ int gForceFTZ = 0; int gWimpyMode = 0; int gHasDouble = 0; int gTestFloat = 1; -//This flag should be 'ON' by default and it can be changed through the command line arguments. -volatile int gTestFastRelaxed = 1; +// This flag should be 'ON' by default and it can be changed through the command +// line arguments. +static int gTestFastRelaxed = 1; /*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions. The spec does not specify ULP for derived function. The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled, Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec. @@ -179,7 +180,9 @@ int doTest( const char* name ) { gTestCount++; vlog( "%3d: ", gTestCount ); - if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) ) + // Test with relaxed requirements here. + if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata, + true /* relaxed mode */)) { gFailCount++; error++; @@ -194,47 +197,38 @@ int doTest( const char* name ) if( gTestFloat ) { - int testFastRelaxedTmp = gTestFastRelaxed; - gTestFastRelaxed = 0; - gTestCount++; vlog( "%3d: ", gTestCount ); - if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) ) + // Don't test with relaxed requirements. + if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata, + false /* relaxed mode */)) { gFailCount++; error++; if( gStopOnError ) { - gTestFastRelaxed = testFastRelaxedTmp; gSkipRestOfTests = true; return error; } } - gTestFastRelaxed = testFastRelaxedTmp; } if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p ) { - //Disable fast-relaxed-math for double precision floating-point - int testFastRelaxedTmp = gTestFastRelaxed; - gTestFastRelaxed = 0; - gTestCount++; vlog( "%3d: ", gTestCount ); - if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata ) ) + // Don't test with relaxed requirements. + if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata, + false /* relaxed mode*/)) { gFailCount++; error++; if( gStopOnError ) { - gTestFastRelaxed = testFastRelaxedTmp; gSkipRestOfTests = true; return error; } } - - //Re-enable testing fast-relaxed-math mode - gTestFastRelaxed = testFastRelaxedTmp; } } @@ -1490,7 +1484,8 @@ int IsTininessDetectedBeforeRounding( void ) } -int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p ) +int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, + cl_program *p, bool relaxedMode) { int error = 0; char options[200] = ""; @@ -1500,7 +1495,7 @@ int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, c strcat(options," -cl-denorms-are-zero"); } - if( gTestFastRelaxed ) + if (relaxedMode) { strcat(options, " -cl-fast-relaxed-math"); } @@ -1527,7 +1522,9 @@ int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, c return error; } -int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +int MakeKernels(const char **c, cl_uint count, const char *name, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { int error = 0; cl_uint i; @@ -1543,7 +1540,7 @@ int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel strcat(options," -cl-fp32-correctly-rounded-divide-sqrt "); } - if( gTestFastRelaxed ) + if (relaxedMode) { strcat(options, " -cl-fast-relaxed-math"); } diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp index 1bd7d889..2c4b503e 100644 --- a/test_conformance/math_brute_force/ternary.cpp +++ b/test_conformance/math_brute_force/ternary.cpp @@ -21,15 +21,19 @@ #define CORRECTLY_ROUNDED 0 #define FLUSHED 1 -int TestFunc_Float_Float_Float_Float(const Func *f, MTdata); -int TestFunc_Double_Double_Double_Double(const Func *f, MTdata); +int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double_Double_Double_Double(const Func *f, MTdata, + bool relaxedMode); extern const vtbl _ternary = { "ternary", TestFunc_Float_Float_Float_Float, TestFunc_Double_Double_Double_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2, __global float", sizeNames[vectorSize], "* in3 )\n" @@ -93,11 +97,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", @@ -163,8 +167,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -173,6 +176,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -180,7 +184,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -188,7 +193,8 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } @@ -208,7 +214,7 @@ static const float specialValuesFloat[] = { static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] ); -int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d) +int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -227,7 +233,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d) cl_uchar overflow[BUFFER_SIZE / sizeof( float )]; float float_ulps; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -239,7 +245,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d) float_ulps = f->float_ulps; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -851,7 +858,8 @@ static const double specialValuesDouble[] = { static const size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] ); -int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) +int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, + bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -863,7 +871,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) double maxErrorVal = 0.0f; double maxErrorVal2 = 0.0f; double maxErrorVal3 = 0.0f; - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; uint64_t step = bufferSize / sizeof( double ); @@ -875,7 +883,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp index a979d07c..7a98cd1f 100644 --- a/test_conformance/math_brute_force/unary.cpp +++ b/test_conformance/math_brute_force/unary.cpp @@ -22,16 +22,20 @@ #include #endif -int TestFunc_Float_Float(const Func *f, MTdata); -int TestFunc_Double_Double(const Func *f, MTdata); +int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _unary = { "unary", TestFunc_Float_Float, TestFunc_Double_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n" @@ -89,10 +93,13 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n" @@ -152,7 +159,8 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); + return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, + relaxedMode); } typedef struct BuildKernelInfo @@ -162,6 +170,7 @@ typedef struct BuildKernelInfo cl_kernel **kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -169,7 +178,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -177,7 +187,9 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); } //Thread specific data for a worker thread @@ -206,20 +218,22 @@ typedef struct TestInfo int isRangeLimited; // 1 if the function is only to be evaluated over a range float half_sin_cos_tan_limit; + bool relaxedMode; // True if test is to be run in relaxed mode, false + // otherwise. }TestInfo; static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p ); -int TestFunc_Float_Float(const Func *f, MTdata d) +int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; size_t i, j; float maxError = 0.0f; double maxErrorVal = 0.0; - int skipTestingRelaxed = ( gTestFastRelaxed && strcmp(f->name,"tan") == 0 ); + int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); @@ -311,7 +325,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } @@ -448,7 +465,8 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) float ulps = job->ulps; fptr func = job->f->func; const char * fname = job->f->name; - if ( gTestFastRelaxed ) + bool relaxedMode = job->relaxedMode; + if (relaxedMode) { ulps = job->f->relaxed_error; func = job->f->rfunc; @@ -483,7 +501,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) for( j = 0; j < buffer_elements; j++ ) { p[j] = base + j * scale; - if( gTestFastRelaxed ) + if (relaxedMode) { float p_j = *(float *) &p[j]; if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 ) //the domain of the function is [-pi,pi] @@ -600,7 +618,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) { fail = 0; } - else if( gTestFastRelaxed ) + else if (relaxedMode) { if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 ) { @@ -680,7 +698,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold. CheckForSubnormal isFloatResultSubnormalPtr; - if ( gTestFastRelaxed ) + if (relaxedMode) { isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError; } @@ -981,7 +999,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) return CL_SUCCESS; } -int TestFunc_Double_Double(const Func *f, MTdata d) +int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) { TestInfo test_info; cl_int error; @@ -995,7 +1013,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d) double end_time; #endif - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info memset( &test_info, 0, sizeof( test_info ) ); test_info.threadCount = GetThreadCount(); @@ -1020,6 +1038,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d) test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; // cl_kernels aren't thread safe, so we make one for each vector size for every thread for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) @@ -1073,7 +1092,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d) // Init the kernels { - BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode }; + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) goto exit; } diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp index a2197413..779681b1 100644 --- a/test_conformance/math_brute_force/unary_two_results.cpp +++ b/test_conformance/math_brute_force/unary_two_results.cpp @@ -18,16 +18,19 @@ #include #include "FunctionList.h" -int TestFunc_Float2_Float(const Func *f, MTdata); -int TestFunc_Double2_Double(const Func *f, MTdata); +int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _unary_two_results = { "unary_two_results", TestFunc_Float2_Float, TestFunc_Double2_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n" "{\n" @@ -88,11 +91,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n" @@ -155,8 +158,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -165,6 +167,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -172,7 +175,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -180,10 +184,11 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } -int TestFunc_Float2_Float(const Func *f, MTdata d) +int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -205,7 +210,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) int skipNanInf = isFract && ! gInfNanSupport; float float_ulps; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -215,11 +220,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) else float_ulps = f->float_ulps; - if (gTestFastRelaxed) - float_ulps = f->relaxed_error; + if (relaxedMode) float_ulps = f->relaxed_error; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -237,7 +242,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) for( j = 0; j < bufferSize / sizeof( float ); j++ ) { p[j] = (uint32_t) i + j * scale; - if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 ) + if (relaxedMode && strcmp(f->name, "sincos") == 0) { float pj = *(float *)&p[j]; if(fabs(pj) > M_PI) @@ -250,7 +255,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) for( j = 0; j < bufferSize / sizeof( float ); j++ ) { p[j] = (uint32_t) i + j; - if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 ) + if (relaxedMode && strcmp(f->name, "sincos") == 0) { float pj = *(float *)&p[j]; if(fabs(pj) > M_PI) @@ -329,7 +334,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) double dd; feclearexcept(FE_OVERFLOW); - if( gTestFastRelaxed ) + if (relaxedMode) r[j] = (float) f->rfunc.f_fpf( s[j], &dd ); else r[j] = (float) f->func.f_fpf( s[j], &dd ); @@ -343,8 +348,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) for( j = 0; j < bufferSize / sizeof( float ); j++ ) { double dd; - if( gTestFastRelaxed ) - r[j] = (float) f->rfunc.f_fpf( s[j], &dd ); + if (relaxedMode) + r[j] = (float)f->rfunc.f_fpf(s[j], &dd); else r[j] = (float) f->func.f_fpf( s[j], &dd ); @@ -395,13 +400,13 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) float test = ((float*) q)[j]; float test2 = ((float*) q2)[j]; - if( gTestFastRelaxed ) - correct = f->rfunc.f_fpf( s[j], &correct2 ); + if (relaxedMode) + correct = f->rfunc.f_fpf(s[j], &correct2); else correct = f->func.f_fpf( s[j], &correct2 ); // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow - if (gTestFastRelaxed || skipNanInf) + if (relaxedMode || skipNanInf) { if (skipNanInf && overflow[j]) continue; @@ -415,7 +420,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold. CheckForSubnormal isFloatResultSubnormalPtr; - if( gTestFastRelaxed ) + if (relaxedMode) { err = Abs_Error( test, correct); err2 = Abs_Error( test2, correct2); @@ -467,7 +472,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) if( skipNanInf ) feclearexcept(FE_OVERFLOW); - if ( gTestFastRelaxed ) + if (relaxedMode) { correctp = f->rfunc.f_fpf( 0.0, &correct2p ); correctn = f->rfunc.f_fpf( -0.0, &correct2n ); @@ -492,7 +497,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d) continue; } - if ( gTestFastRelaxed ) + if (relaxedMode) { errp = Abs_Error( test, correctp ); err2p = Abs_Error( test, correct2p ); @@ -656,7 +661,7 @@ exit: return error; } -int TestFunc_Double2_Double(const Func *f, MTdata d) +int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -672,7 +677,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d) uint64_t step = bufferSize / sizeof( cl_double ); int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1); - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -681,7 +686,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp index f5cc1e34..108be6a4 100644 --- a/test_conformance/math_brute_force/unary_two_results_i.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i.cpp @@ -19,17 +19,20 @@ #include #include "FunctionList.h" -int TestFunc_FloatI_Float(const Func *f, MTdata); -int TestFunc_DoubleI_Double(const Func *f, MTdata); +int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode); +int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode); extern const vtbl _unary_two_results_i = { "unary_two_results_i", TestFunc_FloatI_Float, TestFunc_DoubleI_Double }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n" "{\n" @@ -89,11 +92,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n" @@ -155,8 +158,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); - + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -165,6 +167,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -172,7 +175,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -180,7 +184,8 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } cl_ulong abs_cl_long( cl_long i ); @@ -190,7 +195,7 @@ cl_ulong abs_cl_long( cl_long i ) return (i ^ mask) - mask; } -int TestFunc_FloatI_Float(const Func *f, MTdata d) +int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -208,7 +213,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d) int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1); cl_ulong maxiError; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -221,7 +226,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d) maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -492,7 +498,7 @@ exit: return error; } -int TestFunc_DoubleI_Double(const Func *f, MTdata d) +int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -510,7 +516,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d) uint64_t step = bufferSize / sizeof( double ); int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1); - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -519,7 +525,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp index 690b6e75..87fcae32 100644 --- a/test_conformance/math_brute_force/unary_u.cpp +++ b/test_conformance/math_brute_force/unary_u.cpp @@ -18,17 +18,20 @@ #include #include "FunctionList.h" -int TestFunc_Float_UInt(const Func *f, MTdata); -int TestFunc_Double_ULong(const Func *f, MTdata); +int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode); +int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode); extern const vtbl _unary_u = { "unary_u", TestFunc_Float_UInt, TestFunc_Double_ULong }; -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ); +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode); -static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n" @@ -86,10 +89,11 @@ static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_progr char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p ) +static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, + cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", @@ -150,7 +154,7 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl char testName[32]; snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] ); - return MakeKernel(kern, (cl_uint) kernSize, testName, k, p); + return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } typedef struct BuildKernelInfo @@ -159,6 +163,7 @@ typedef struct BuildKernelInfo cl_kernel *kernels; cl_program *programs; const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. }BuildKernelInfo; static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -166,7 +171,8 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernel(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p ); @@ -174,10 +180,11 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo { BuildKernelInfo *info = (BuildKernelInfo*) p; cl_uint i = info->offset + job_id; - return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i ); + return BuildKernelDouble(info->nameInCode, i, info->kernels + i, + info->programs + i, info->relaxedMode); } -int TestFunc_Float_UInt(const Func *f, MTdata d) +int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -195,7 +202,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d) float float_ulps; float half_sin_cos_tan_limit = 0; - logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -206,7 +213,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d) float_ulps = f->float_ulps; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) return error; /* @@ -454,7 +462,7 @@ static cl_ulong random64( MTdata d ) return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); } -int TestFunc_Double_ULong(const Func *f, MTdata d) +int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) { uint64_t i; uint32_t j, k; @@ -467,7 +475,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d) size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE; uint64_t step = bufferSize / sizeof( cl_double ); - logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); if( gWimpyMode ) { step = (1ULL<<32) * gWimpyReductionFactor / (512); @@ -475,7 +483,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode }; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) )) @@ -689,4 +698,3 @@ exit: return error; } -