diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp index ff04d836..8ce9f67d 100644 --- a/test_conformance/math_brute_force/binary_double.cpp +++ b/test_conformance/math_brute_force/binary_double.cpp @@ -25,25 +25,16 @@ namespace { const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double, - ParameterType::Double, ParameterType::Double, - vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Double, + ParameterType::Double, ParameterType::Double, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -609,13 +600,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.skipNanInf = 0; test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -708,14 +692,5 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 95cb8e67..ace1fadf 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -25,25 +25,16 @@ namespace { const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = - GetBinaryKernel(kernel_name, name, ParameterType::Float, - ParameterType::Float, ParameterType::Float, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Float, + ParameterType::Float, ParameterType::Float, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -768,13 +759,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -867,14 +851,5 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 75e32974..2e08996a 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -24,25 +24,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = - GetBinaryKernel(kernel_name, name, ParameterType::Double, - ParameterType::Double, ParameterType::Int, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Double, + ParameterType::Double, ParameterType::Int, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -528,13 +519,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ; test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -630,14 +614,5 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp index b11ac48f..174f58c7 100644 --- a/test_conformance/math_brute_force/binary_i_float.cpp +++ b/test_conformance/math_brute_force/binary_i_float.cpp @@ -24,25 +24,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = - GetBinaryKernel(kernel_name, name, ParameterType::Float, - ParameterType::Float, ParameterType::Int, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Float, + ParameterType::Float, ParameterType::Int, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -523,13 +514,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -625,14 +609,5 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 9852f005..a96f8f6a 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -23,25 +23,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double, - ParameterType::Double, ParameterType::Double, - vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Double, + ParameterType::Double, ParameterType::Double, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -575,13 +566,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -674,14 +658,5 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index dbdc6f12..68497de5 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -23,25 +23,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = - GetBinaryKernel(kernel_name, name, ParameterType::Float, - ParameterType::Float, ParameterType::Float, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Float, + ParameterType::Float, ParameterType::Float, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -704,13 +695,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -803,14 +787,5 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp index 42069962..71b4defe 100644 --- a/test_conformance/math_brute_force/common.cpp +++ b/test_conformance/math_brute_force/common.cpp @@ -85,6 +85,28 @@ void EmitEnableExtension(std::ostringstream &kernel, ParameterType type) } } +std::string GetBuildOptions(bool relaxed_mode) +{ + std::ostringstream options; + + if (gForceFTZ) + { + options << " -cl-denorms-are-zero"; + } + + if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) + { + options << " -cl-fp32-correctly-rounded-divide-sqrt"; + } + + if (relaxed_mode) + { + options << " -cl-fast-relaxed-math"; + } + + return options.str(); +} + } // anonymous namespace std::string GetKernelName(int vector_size_index) @@ -530,3 +552,50 @@ __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out, return kernel.str(); } + +cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id, + SourceGenerator generator) +{ + // Generate the kernel code. + cl_uint vector_size_index = gMinVectorSizeIndex + job_id; + auto kernel_name = GetKernelName(vector_size_index); + auto source = generator(kernel_name, info.nameInCode, vector_size_index); + std::array sources{ source.c_str() }; + + // Create the program. + clProgramWrapper &program = info.programs[vector_size_index]; + auto options = GetBuildOptions(info.relaxedMode); + int error = + create_single_kernel_helper(gContext, &program, nullptr, sources.size(), + sources.data(), nullptr, options.c_str()); + if (error != CL_SUCCESS) + { + vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error); + return error; + } + + // Create a kernel for each thread. cl_kernels aren't thread safe, so make + // one for every thread + auto &kernels = info.kernels[vector_size_index]; + assert(kernels.empty() && "Dirty BuildKernelInfo"); + kernels.resize(info.threadCount); + for (auto &kernel : kernels) + { + kernel = clCreateKernel(program, kernel_name.c_str(), &error); + if (!kernel || error != CL_SUCCESS) + { + vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error); + size_t log_size; + clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, 0, + nullptr, &log_size); + std::string buffer; + buffer.resize(log_size + 1); + clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, + log_size, &buffer[0], NULL); + vlog_error("Log: %s\n", buffer.c_str()); + return error; + } + } + + return CL_SUCCESS; +} diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 027a4da8..481b3b2a 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -24,7 +24,8 @@ #include // Array of thread-specific kernels for each vector size. -using KernelMatrix = std::array, VECTOR_SIZE_COUNT>; +using KernelMatrix = + std::array, VECTOR_SIZE_COUNT>; // Array of programs for each vector size. using Programs = std::array; @@ -82,4 +83,12 @@ struct BuildKernelInfo bool relaxedMode; }; +using SourceGenerator = std::string (*)(const std::string &kernel_name, + const char *builtin, + cl_uint vector_size_index); + +/// Build kernels for all threads in "info" for the given job_id. +cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id, + SourceGenerator generator); + #endif /* COMMON_H */ diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index 003c2c3b..14a6b732 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -24,25 +24,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetBinaryKernel(kernel_name, name, ParameterType::Long, - ParameterType::Double, ParameterType::Double, - vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Long, + ParameterType::Double, ParameterType::Double, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -537,13 +528,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ; test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -623,14 +607,5 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp index 4963db20..12eff3b9 100644 --- a/test_conformance/math_brute_force/macro_binary_float.cpp +++ b/test_conformance/math_brute_force/macro_binary_float.cpp @@ -23,25 +23,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = - GetBinaryKernel(kernel_name, name, ParameterType::Int, - ParameterType::Float, ParameterType::Float, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Int, + ParameterType::Float, ParameterType::Float, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -525,13 +516,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -611,14 +595,5 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp index a486f70c..10de39dd 100644 --- a/test_conformance/math_brute_force/macro_unary_double.cpp +++ b/test_conformance/math_brute_force/macro_unary_double.cpp @@ -24,24 +24,15 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetUnaryKernel(kernel_name, name, ParameterType::Long, - ParameterType::Double, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetUnaryKernel(kernel_name, builtin, ParameterType::Long, + ParameterType::Double, vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -326,13 +317,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ; test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -400,14 +384,5 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 832dcb6d..c1335cce 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -23,24 +23,15 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int, - ParameterType::Float, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetUnaryKernel(kernel_name, builtin, ParameterType::Int, + ParameterType::Float, vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -338,13 +329,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -412,14 +396,5 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 64491bd4..ab75efd5 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1050,54 +1050,6 @@ int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, return error; } -int MakeKernels(const char **c, cl_uint count, const char *name, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode) -{ - char options[200] = ""; - - if (gForceFTZ) - { - strcat(options, " -cl-denorms-are-zero "); - } - - if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) - { - strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt "); - } - - if (relaxedMode) - { - strcat(options, " -cl-fast-relaxed-math"); - } - - int error = - create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); - if (error != CL_SUCCESS) - { - vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error); - return error; - } - - for (cl_uint i = 0; i < kernel_count; i++) - { - k[i] = clCreateKernel(*p, name, &error); - if (NULL == k[i] || error) - { - char buffer[2048] = ""; - - vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error); - clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, - sizeof(buffer), buffer, NULL); - vlog_error("Log: %s\n", buffer); - return error; - } - } - - return error; -} - - static int IsInRTZMode(void) { int error; diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 2a03d6e6..154cda1b 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -24,24 +24,15 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double, - ParameterType::Double, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetUnaryKernel(kernel_name, builtin, ParameterType::Double, + ParameterType::Double, vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -351,13 +342,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ; test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -437,14 +421,5 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index c774d6b3..740cddad 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -23,24 +23,15 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - auto kernel_name = GetKernelName(vectorSize); - auto source = GetUnaryKernel(kernel_name, name, ParameterType::Float, - ParameterType::Float, vectorSize); - std::array sources{ source.c_str() }; - return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(), - kernel_count, k, p, relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetUnaryKernel(kernel_name, builtin, ParameterType::Float, + ParameterType::Float, vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -505,13 +496,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -615,14 +599,5 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - return error; } diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index b4a59edb..2072c56b 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -85,9 +85,6 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference); int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p, bool relaxedMode); -int MakeKernels(const char **c, cl_uint count, const char *name, - cl_uint kernel_count, cl_kernel *k, cl_program *p, - bool relaxedMode); // used to convert a bucket of bits into a search pattern through double inline double DoubleFromUInt32(uint32_t bits)