From ba9312e4a2e2431a716150a3df3491834076d046 Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Fri, 21 May 2021 10:07:54 +0100 Subject: [PATCH] Fix ODR violations in math_brute_force (#1255) A program having a type (such as ThreadInfo) defined differently in multiple translation units exhibits undefined behaviour. This commit fixes such issues in the math_brute_force component by ensuring most types are local to their translation unit with the help of anonymous namespaces. Later refactoring will be able to extract common definitions to a single place. This patch also removes unnecessary static and typedef keywords. Otherwise, code is only moved around with no change. Signed-off-by: Marco Antognini --- .../math_brute_force/binary_double.cpp | 396 ++++++++--------- .../math_brute_force/binary_float.cpp | 398 ++++++++--------- .../math_brute_force/binary_i_double.cpp | 399 ++++++++--------- .../math_brute_force/binary_i_float.cpp | 401 ++++++++--------- .../binary_operator_double.cpp | 392 ++++++++--------- .../binary_operator_float.cpp | 396 ++++++++--------- .../binary_two_results_i_double.cpp | 20 +- .../binary_two_results_i_float.cpp | 20 +- .../math_brute_force/function_list.cpp | 1 + .../math_brute_force/i_unary_double.cpp | 14 +- .../math_brute_force/i_unary_float.cpp | 14 +- .../math_brute_force/macro_binary_double.cpp | 355 +++++++-------- .../math_brute_force/macro_binary_float.cpp | 357 ++++++++-------- .../math_brute_force/macro_unary_double.cpp | 323 +++++++------- .../math_brute_force/macro_unary_float.cpp | 325 +++++++------- .../math_brute_force/mad_double.cpp | 14 +- .../math_brute_force/mad_float.cpp | 14 +- .../math_brute_force/ternary_double.cpp | 18 +- .../math_brute_force/ternary_float.cpp | 18 +- .../math_brute_force/unary_double.cpp | 352 +++++++-------- .../math_brute_force/unary_float.cpp | 404 +++++++++--------- .../unary_two_results_double.cpp | 14 +- .../unary_two_results_float.cpp | 14 +- .../unary_two_results_i_double.cpp | 16 +- .../unary_two_results_i_float.cpp | 16 +- .../math_brute_force/unary_u_double.cpp | 16 +- .../math_brute_force/unary_u_float.cpp | 14 +- 27 files changed, 2400 insertions(+), 2321 deletions(-) diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index 4baa4991..9c6b59b4 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -20,10 +20,12 @@ #include +namespace { + const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -109,7 +111,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -117,9 +119,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -128,7 +130,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -140,9 +142,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -164,10 +166,10 @@ typedef struct TestInfo int isNextafter; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -277,195 +279,10 @@ static const double specialValues[] = { +0.0, }; -static size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); - test_info.skipNanInf = 0; - test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -825,3 +642,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = 0; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 32caafa3..9c7081dc 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -20,10 +20,12 @@ #include +namespace { + const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -115,9 +117,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -126,7 +128,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -138,9 +140,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -162,10 +164,10 @@ typedef struct TestInfo int isNextafter; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -267,196 +269,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); - test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; - test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -986,3 +802,189 @@ exit: if (overflow) free(overflow); return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 69e620aa..2fcc8c10 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,7 +110,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -116,9 +118,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -139,9 +141,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -159,10 +161,10 @@ typedef struct TestInfo int ftz; // non-zero if running in flush to zero mode // no special values -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -272,201 +274,18 @@ static const double specialValues[] = { +0.0, }; -static size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static const int specialValuesInt[] = { +const int specialValuesInt[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX, }; -static constexpr size_t specialValuesIntCount = + +constexpr size_t specialValuesIntCount = sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize - * sizeof(cl_int), - test_info.subBufferSize * sizeof(cl_int) }; - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -744,3 +563,187 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index e65a9aaf..e1538e3c 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,7 +108,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -114,9 +116,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -137,9 +139,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -157,10 +159,10 @@ typedef struct TestInfo int ftz; // non-zero if running in flush to zero mode // no special values -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -262,204 +264,20 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static const int specialValuesInt[] = { +const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147, }; -static size_t specialValuesIntCount = + +constexpr size_t specialValuesIntCount = sizeof(specialValuesInt) / sizeof(specialValuesInt[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - cl_int maxErrorVal2 = 0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - cl_buffer_region region2 = { i * test_info.subBufferSize - * sizeof(cl_int), - test_info.subBufferSize * sizeof(cl_int) }; - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -738,3 +556,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + cl_int maxErrorVal2 = 0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + cl_buffer_region region2 = { i * test_info.subBufferSize + * sizeof(cl_int), + test_info.subBufferSize * sizeof(cl_int) }; + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 21e76c85..605a3144 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -20,9 +20,11 @@ #include -static int BuildKernel(const char *operator_symbol, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,7 +110,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -116,9 +118,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -139,9 +141,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -161,10 +163,10 @@ typedef struct TestInfo // otherwise. // no special fields -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -274,192 +276,10 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -793,3 +613,185 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index ccaef604..8448af54 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -20,9 +20,11 @@ #include -static int BuildKernel(const char *operator_symbol, int vectorSize, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) +namespace { + +int BuildKernel(const char *operator_symbol, int vectorSize, + cl_uint kernel_count, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,7 +108,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -114,9 +116,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *operator_symbol; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread @@ -137,9 +139,9 @@ typedef struct ThreadInfo // to 0. MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -159,10 +161,10 @@ typedef struct TestInfo // otherwise. // no special fields -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -264,194 +266,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, - bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -923,3 +741,187 @@ exit: if (overflow) free(overflow); return error; } + +} // anonymous namespace + +int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, + bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp index 14f41092..43dc1d30 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -115,16 +117,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -typedef struct ComputeReferenceInfoD_ +struct ComputeReferenceInfoD { const double *x; const double *y; @@ -141,9 +143,9 @@ typedef struct ComputeReferenceInfoD_ long double (*f_ffpI)(long double, long double, int *); cl_uint lim; cl_uint count; -} ComputeReferenceInfoD; +}; -static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) +cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) { ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo; cl_uint lim = cri->lim; @@ -165,6 +167,8 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo) return CL_SUCCESS; } +} // anonymous namespace + int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp index 5ef44b6e..83ceeaab 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -130,7 +132,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -typedef struct ComputeReferenceInfoF_ +struct ComputeReferenceInfoF { const float *x; const float *y; @@ -139,9 +141,9 @@ typedef struct ComputeReferenceInfoF_ double (*f_ffpI)(double, double, int *); cl_uint lim; cl_uint count; -} ComputeReferenceInfoF; +}; -static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) +cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) { ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo; cl_uint lim = cri->lim; @@ -161,6 +163,8 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo) return CL_SUCCESS; } +} // anonymous namespace + int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 3edbb485..91736285 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -53,6 +53,7 @@ STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \ _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \ } + #define unaryF NULL #define i_unaryF NULL #define unaryF_u NULL diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index 4383fa8b..d09e14c1 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -100,16 +102,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -117,6 +119,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index c803aa32..89b566d9 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -98,16 +100,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -115,6 +117,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index d09915f6..11281261 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -115,9 +117,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -126,16 +128,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -150,11 +152,10 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode - -} TestInfo; +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -264,174 +265,10 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (size_t i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -735,3 +572,167 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (size_t i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index c530cdaf..6475e4bb 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -105,7 +107,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -113,9 +115,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -124,16 +126,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem inBuf2; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread MTdata d; cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -148,11 +150,10 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode - -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -254,175 +255,10 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -724,3 +560,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + free_mtdata(test_info.tinfo[i].d); + clReleaseMemObject(test_info.tinfo[i].inBuf); + clReleaseMemObject(test_info.tinfo[i].inBuf2); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index 00e65a2c..860e4596 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -109,9 +111,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -120,14 +122,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -142,160 +144,9 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode +}; -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -506,3 +357,153 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 3c1717ac..58a2a954 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -100,7 +102,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -108,9 +110,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -119,14 +121,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -141,161 +143,9 @@ typedef struct TestInfo cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode +}; -} TestInfo; - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -521,3 +371,154 @@ exit: return ret; } + +} // anonymous namespace + +int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp index a32cd5a8..8e88f9f6 100644 --- a/test_conformance/math_brute_force/mad_double.cpp +++ b/test_conformance/math_brute_force/mad_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -130,6 +132,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp index 095a22ff..0552ba4b 100644 --- a/test_conformance/math_brute_force/mad_float.cpp +++ b/test_conformance/math_brute_force/mad_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -111,16 +113,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -128,6 +130,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp index 606fdc5a..8af136ac 100644 --- a/test_conformance/math_brute_force/ternary_double.cpp +++ b/test_conformance/math_brute_force/ternary_double.cpp @@ -23,8 +23,10 @@ #define CORRECTLY_ROUNDED 0 #define FLUSHED 1 -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -116,16 +118,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -134,7 +136,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -202,9 +204,11 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); +} // anonymous namespace + int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) { diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp index e52c0a0f..c69083ad 100644 --- a/test_conformance/math_brute_force/ternary_float.cpp +++ b/test_conformance/math_brute_force/ternary_float.cpp @@ -23,8 +23,10 @@ #define CORRECTLY_ROUNDED 0 #define FLUSHED 1 -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -114,16 +116,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -210,9 +212,11 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); +} // anonymous namespace + int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index f6fa3264..dcd21884 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -109,9 +111,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -120,16 +122,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -151,174 +153,9 @@ typedef struct TestInfo float half_sin_cos_tan_limit; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = f->double_ulps; - test_info.ftz = f->ftz || gForceFTZ; - test_info.relaxedMode = relaxedMode; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ %a", maxError, maxErrorVal); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -547,3 +384,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = f->double_ulps; + test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 17edc58d..f176fb95 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_uint kernel_count; @@ -107,9 +109,9 @@ typedef struct BuildKernelInfo cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { cl_mem inBuf; // input buffer for the thread cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value. Init to 0. cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info @@ -149,200 +151,9 @@ typedef struct TestInfo float half_sin_cos_tan_limit; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - } - - // Check for special cases for unary float - test_info.isRangeLimited = 0; - test_info.half_sin_cos_tan_limit = 0; - if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = 1.0f - + test_info.ulps - * (FLT_EPSILON / 2.0f); // out of range results from finite - // inputs must be in [-1,1] - } - else if (0 == strcmp(f->name, "half_tan")) - { - test_info.isRangeLimited = 1; - test_info.half_sin_cos_tan_limit = - INFINITY; // out of range resut from finite inputs must be numeric - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting || skipTestingRelaxed) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - if (skipTestingRelaxed) - { - vlog(" (rlx skip correctness testing)\n"); - goto exit; - } - - vlog("\t%8.2f @ %a", maxError, maxErrorVal); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - clReleaseMemObject(test_info.tinfo[i].inBuf); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; size_t buffer_elements = job->subBufferSize; @@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) return CL_SUCCESS; } + +} // anonymous namespace + +int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0); + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + memset(&test_info, 0, sizeof(test_info)); + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + size_t array_size = test_info.threadCount * sizeof(cl_kernel); + test_info.k[i] = (cl_kernel *)malloc(array_size); + if (NULL == test_info.k[i]) + { + vlog_error("Error: Unable to allocate storage for kernels!\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.k[i], 0, array_size); + } + test_info.tinfo = + (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); + if (NULL == test_info.tinfo) + { + vlog_error( + "Error: Unable to allocate storage for thread specific data.\n"); + error = CL_OUT_OF_HOST_MEMORY; + goto exit; + } + memset(test_info.tinfo, 0, + test_info.threadCount * sizeof(*test_info.tinfo)); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + } + + // Check for special cases for unary float + test_info.isRangeLimited = 0; + test_info.half_sin_cos_tan_limit = 0; + if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = 1.0f + + test_info.ulps + * (FLT_EPSILON / 2.0f); // out of range results from finite + // inputs must be in [-1,1] + } + else if (0 == strcmp(f->name, "half_tan")) + { + test_info.isRangeLimited = 1; + test_info.half_sin_cos_tan_limit = + INFINITY; // out of range resut from finite inputs must be numeric + } + + // Init the kernels + { + BuildKernelInfo build_info = { + gMinVectorSizeIndex, test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, relaxedMode + }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting || skipTestingRelaxed) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + if (skipTestingRelaxed) + { + vlog(" (rlx skip correctness testing)\n"); + goto exit; + } + + vlog("\t%8.2f @ %a", maxError, maxErrorVal); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + clReleaseProgram(test_info.programs[i]); + if (test_info.k[i]) + { + for (cl_uint j = 0; j < test_info.threadCount; j++) + clReleaseKernel(test_info.k[i][j]); + + free(test_info.k[i]); + } + } + if (test_info.tinfo) + { + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + clReleaseMemObject(test_info.tinfo[i].inBuf); + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + clReleaseMemObject(test_info.tinfo[i].outBuf[j]); + clReleaseCommandQueue(test_info.tinfo[i].tQueue); + } + + free(test_info.tinfo); + } + + return error; +} diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp index 71dd4f44..8757fbc4 100644 --- a/test_conformance/math_brute_force/unary_two_results_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -107,16 +109,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -124,6 +126,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 4a375ce3..a54bd024 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -105,16 +107,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -122,6 +124,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp index 14d1fb99..9ed77dce 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -108,16 +110,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -125,12 +127,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong abs_cl_long(cl_long i) +cl_ulong abs_cl_long(cl_long i) { cl_long mask = i >> 63; return (i ^ mask) - mask; } +} // anonymous namespace + int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp index 23b0d707..d048220b 100644 --- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp @@ -21,8 +21,10 @@ #include #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -106,16 +108,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -123,12 +125,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong abs_cl_long(cl_long i) +cl_ulong abs_cl_long(cl_long i) { cl_long mask = i >> 63; return (i ^ mask) - mask; } +} // anonymous namespace + int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp index 3c5f99da..9478d0bc 100644 --- a/test_conformance/math_brute_force/unary_u_double.cpp +++ b/test_conformance/math_brute_force/unary_u_double.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -102,16 +104,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -119,11 +121,13 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } -static cl_ulong random64(MTdata d) +cl_ulong random64(MTdata d) { return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32); } +} // anonymous namespace + int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) { int error; diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp index 44c5af47..848a9bac 100644 --- a/test_conformance/math_brute_force/unary_u_float.cpp +++ b/test_conformance/math_brute_force/unary_u_float.cpp @@ -20,8 +20,10 @@ #include -static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, - cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p, + bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -99,16 +101,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode); } -typedef struct BuildKernelInfo +struct BuildKernelInfo { cl_uint offset; // the first vector size to build cl_kernel *kernels; cl_program *programs; const char *nameInCode; bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; +}; -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; cl_uint i = info->offset + job_id; @@ -116,6 +118,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) info->programs + i, info->relaxedMode); } +} // anonymous namespace + int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) { int error;