mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-25 16:29:03 +00:00
math_brute_force: Use clKernelWrapper in threaded tests (#1562)
Simplify code by relying on RAII to free resources. Reduce code duplication. This commit only affects tests that use `BuildKernelInfo`, which are the multi-threaded tests. Another patch will deal with the single-threaded tests, i.e., those using `BuildKernelInfo2`. Original patch by Marco Antognini. Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
This commit is contained in:
committed by
GitHub
parent
e9d2abf705
commit
6ecf824122
@@ -25,25 +25,16 @@ namespace {
|
|||||||
|
|
||||||
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
|
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
|
||||||
ParameterType::Double, ParameterType::Double,
|
|
||||||
vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, ParameterType::Double,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -609,13 +600,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.skipNanInf = 0;
|
test_info.skipNanInf = 0;
|
||||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -708,14 +692,5 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,25 +25,16 @@ namespace {
|
|||||||
|
|
||||||
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
|
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source =
|
|
||||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
|
||||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, ParameterType::Float,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -768,13 +759,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -867,14 +851,5 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,25 +24,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source =
|
|
||||||
GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
|
||||||
ParameterType::Double, ParameterType::Int, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, ParameterType::Int,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -528,13 +519,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.ftz = f->ftz || gForceFTZ;
|
test_info.ftz = f->ftz || gForceFTZ;
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -630,14 +614,5 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,25 +24,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source =
|
|
||||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
|
||||||
ParameterType::Float, ParameterType::Int, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, ParameterType::Int,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -523,13 +514,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -625,14 +609,5 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,25 +23,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
|
||||||
ParameterType::Double, ParameterType::Double,
|
|
||||||
vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, ParameterType::Double,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -575,13 +566,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
test_info.ulps = f->double_ulps;
|
test_info.ulps = f->double_ulps;
|
||||||
test_info.ftz = f->ftz || gForceFTZ;
|
test_info.ftz = f->ftz || gForceFTZ;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -674,14 +658,5 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,25 +23,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source =
|
|
||||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
|
||||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, ParameterType::Float,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -704,13 +695,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -803,14 +787,5 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,6 +85,28 @@ void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GetBuildOptions(bool relaxed_mode)
|
||||||
|
{
|
||||||
|
std::ostringstream options;
|
||||||
|
|
||||||
|
if (gForceFTZ)
|
||||||
|
{
|
||||||
|
options << " -cl-denorms-are-zero";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
|
||||||
|
{
|
||||||
|
options << " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (relaxed_mode)
|
||||||
|
{
|
||||||
|
options << " -cl-fast-relaxed-math";
|
||||||
|
}
|
||||||
|
|
||||||
|
return options.str();
|
||||||
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
std::string GetKernelName(int vector_size_index)
|
std::string GetKernelName(int vector_size_index)
|
||||||
@@ -530,3 +552,50 @@ __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
|||||||
|
|
||||||
return kernel.str();
|
return kernel.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
|
||||||
|
SourceGenerator generator)
|
||||||
|
{
|
||||||
|
// Generate the kernel code.
|
||||||
|
cl_uint vector_size_index = gMinVectorSizeIndex + job_id;
|
||||||
|
auto kernel_name = GetKernelName(vector_size_index);
|
||||||
|
auto source = generator(kernel_name, info.nameInCode, vector_size_index);
|
||||||
|
std::array<const char *, 1> sources{ source.c_str() };
|
||||||
|
|
||||||
|
// Create the program.
|
||||||
|
clProgramWrapper &program = info.programs[vector_size_index];
|
||||||
|
auto options = GetBuildOptions(info.relaxedMode);
|
||||||
|
int error =
|
||||||
|
create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
|
||||||
|
sources.data(), nullptr, options.c_str());
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a kernel for each thread. cl_kernels aren't thread safe, so make
|
||||||
|
// one for every thread
|
||||||
|
auto &kernels = info.kernels[vector_size_index];
|
||||||
|
assert(kernels.empty() && "Dirty BuildKernelInfo");
|
||||||
|
kernels.resize(info.threadCount);
|
||||||
|
for (auto &kernel : kernels)
|
||||||
|
{
|
||||||
|
kernel = clCreateKernel(program, kernel_name.c_str(), &error);
|
||||||
|
if (!kernel || error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
|
||||||
|
size_t log_size;
|
||||||
|
clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, 0,
|
||||||
|
nullptr, &log_size);
|
||||||
|
std::string buffer;
|
||||||
|
buffer.resize(log_size + 1);
|
||||||
|
clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG,
|
||||||
|
log_size, &buffer[0], NULL);
|
||||||
|
vlog_error("Log: %s\n", buffer.c_str());
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|||||||
@@ -24,7 +24,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// Array of thread-specific kernels for each vector size.
|
// Array of thread-specific kernels for each vector size.
|
||||||
using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
|
using KernelMatrix =
|
||||||
|
std::array<std::vector<clKernelWrapper>, VECTOR_SIZE_COUNT>;
|
||||||
|
|
||||||
// Array of programs for each vector size.
|
// Array of programs for each vector size.
|
||||||
using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
|
using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
|
||||||
@@ -82,4 +83,12 @@ struct BuildKernelInfo
|
|||||||
bool relaxedMode;
|
bool relaxedMode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using SourceGenerator = std::string (*)(const std::string &kernel_name,
|
||||||
|
const char *builtin,
|
||||||
|
cl_uint vector_size_index);
|
||||||
|
|
||||||
|
/// Build kernels for all threads in "info" for the given job_id.
|
||||||
|
cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
|
||||||
|
SourceGenerator generator);
|
||||||
|
|
||||||
#endif /* COMMON_H */
|
#endif /* COMMON_H */
|
||||||
|
|||||||
@@ -24,25 +24,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Long,
|
|
||||||
ParameterType::Double, ParameterType::Double,
|
|
||||||
vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Long,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, ParameterType::Double,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -537,13 +528,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.ftz = f->ftz || gForceFTZ;
|
test_info.ftz = f->ftz || gForceFTZ;
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -623,14 +607,5 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,25 +23,16 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source =
|
|
||||||
GetBinaryKernel(kernel_name, name, ParameterType::Int,
|
|
||||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Int,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, ParameterType::Float,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -525,13 +516,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -611,14 +595,5 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,24 +24,15 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Long,
|
|
||||||
ParameterType::Double, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Long,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -326,13 +317,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.ftz = f->ftz || gForceFTZ;
|
test_info.ftz = f->ftz || gForceFTZ;
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -400,14 +384,5 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,24 +23,15 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
|
|
||||||
ParameterType::Float, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -338,13 +329,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -412,14 +396,5 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1050,54 +1050,6 @@ int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
int MakeKernels(const char **c, cl_uint count, const char *name,
|
|
||||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
|
||||||
bool relaxedMode)
|
|
||||||
{
|
|
||||||
char options[200] = "";
|
|
||||||
|
|
||||||
if (gForceFTZ)
|
|
||||||
{
|
|
||||||
strcat(options, " -cl-denorms-are-zero ");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
|
|
||||||
{
|
|
||||||
strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (relaxedMode)
|
|
||||||
{
|
|
||||||
strcat(options, " -cl-fast-relaxed-math");
|
|
||||||
}
|
|
||||||
|
|
||||||
int error =
|
|
||||||
create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
|
|
||||||
if (error != CL_SUCCESS)
|
|
||||||
{
|
|
||||||
vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (cl_uint i = 0; i < kernel_count; i++)
|
|
||||||
{
|
|
||||||
k[i] = clCreateKernel(*p, name, &error);
|
|
||||||
if (NULL == k[i] || error)
|
|
||||||
{
|
|
||||||
char buffer[2048] = "";
|
|
||||||
|
|
||||||
vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
|
|
||||||
clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
|
|
||||||
sizeof(buffer), buffer, NULL);
|
|
||||||
vlog_error("Log: %s\n", buffer);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int IsInRTZMode(void)
|
static int IsInRTZMode(void)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
|||||||
@@ -24,24 +24,15 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
|
|
||||||
ParameterType::Double, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Double, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -351,13 +342,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.ftz = f->ftz || gForceFTZ;
|
test_info.ftz = f->ftz || gForceFTZ;
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -437,14 +421,5 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,24 +23,15 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
|
||||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
|
||||||
{
|
|
||||||
auto kernel_name = GetKernelName(vectorSize);
|
|
||||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Float,
|
|
||||||
ParameterType::Float, vectorSize);
|
|
||||||
std::array<const char *, 1> sources{ source.c_str() };
|
|
||||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
|
||||||
kernel_count, k, p, relaxedMode);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
|
cl_uint vector_size_index) {
|
||||||
info->kernels[vectorSize].data(),
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
|
||||||
&(info->programs[vectorSize]), info->relaxedMode);
|
ParameterType::Float, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread specific data for a worker thread
|
// Thread specific data for a worker thread
|
||||||
@@ -505,13 +496,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.ftz =
|
test_info.ftz =
|
||||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
|
||||||
// every thread
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
test_info.k[i].resize(test_info.threadCount, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
test_info.tinfo.resize(test_info.threadCount);
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -615,14 +599,5 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
|
||||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
||||||
{
|
|
||||||
for (auto &kernel : test_info.k[i])
|
|
||||||
{
|
|
||||||
clReleaseKernel(kernel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,9 +85,6 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference);
|
|||||||
|
|
||||||
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
||||||
cl_program *p, bool relaxedMode);
|
cl_program *p, bool relaxedMode);
|
||||||
int MakeKernels(const char **c, cl_uint count, const char *name,
|
|
||||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
|
||||||
bool relaxedMode);
|
|
||||||
|
|
||||||
// used to convert a bucket of bits into a search pattern through double
|
// used to convert a bucket of bits into a search pattern through double
|
||||||
inline double DoubleFromUInt32(uint32_t bits)
|
inline double DoubleFromUInt32(uint32_t bits)
|
||||||
|
|||||||
Reference in New Issue
Block a user