math_brute_force: Drop BuildKernelInfo2 (#1634)

Replace occurrences of `BuildKernelInfo2` with `BuildKernelInfo`.
This aligns the kernel creation code of the non-threaded tests to
the kernel creation code of the threaded tests.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
This commit is contained in:
Sven van Haastregt
2023-03-20 09:44:25 +00:00
committed by paulfradgley
parent 8d866579e9
commit 8e74df8fd4
14 changed files with 301 additions and 546 deletions

View File

@@ -25,37 +25,19 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Int, ParameterType::Double,
ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::Int, ParameterType::Double,
ParameterType::Double, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
struct ComputeReferenceInfoD
{
const double *x;
@@ -95,7 +77,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int64_t maxError2 = 0;
int ftz = f->ftz || gForceFTZ;
@@ -112,8 +95,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
int testingRemquo = !strcmp(f->name, "remquo");
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -200,34 +183,35 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -25,35 +25,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::Int, ParameterType::Float,
ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::Int, ParameterType::Float,
ParameterType::Float, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
struct ComputeReferenceInfoF
@@ -96,7 +77,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
int64_t maxError2 = 0;
@@ -115,8 +97,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
int testingRemquo = !strcmp(f->name, "remquo");
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -203,34 +185,35 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -24,34 +24,15 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
ParameterType::Double, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -60,7 +41,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
int ftz = f->ftz || gForceFTZ;
uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
int scale =
@@ -77,8 +59,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -143,22 +125,22 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
goto exit;

View File

@@ -24,34 +24,15 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
ParameterType::Float, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -60,7 +41,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
@@ -76,8 +58,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -142,22 +124,22 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
goto exit;

View File

@@ -23,35 +23,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
ParameterType::Double, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -60,7 +41,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
double maxErrorVal = 0.0f;
double maxErrorVal2 = 0.0f;
@@ -70,8 +52,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -147,34 +129,34 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
&gInBuffer3)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer3), &gInBuffer3)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -23,35 +23,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::Float, ParameterType::Float,
ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::Float, ParameterType::Float,
ParameterType::Float, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -63,7 +44,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
float maxErrorVal = 0.0f;
float maxErrorVal2 = 0.0f;
@@ -71,8 +53,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -148,34 +130,34 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
&gInBuffer3)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer3), &gInBuffer3)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -27,35 +27,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
ParameterType::Double, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
// A table of more difficult cases to get right
@@ -137,7 +118,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ;
double maxErrorVal = 0.0f;
@@ -150,8 +132,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
Force64BitFPUPrecision();
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -252,34 +234,34 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
&gInBuffer3)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer3), &gInBuffer3)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -27,35 +27,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::Float, ParameterType::Float,
ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::Float, ParameterType::Float,
ParameterType::Float, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
// A table of more difficult cases to get right
@@ -149,7 +130,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
float maxErrorVal = 0.0f;
@@ -168,8 +150,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -274,34 +256,34 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
&gInBuffer2)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer2), &gInBuffer2)))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
&gInBuffer3)))
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
sizeof(gInBuffer3), &gInBuffer3)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -24,35 +24,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -61,7 +42,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError0 = 0.0f;
float maxError1 = 0.0f;
int ftz = f->ftz || gForceFTZ;
@@ -76,8 +58,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision();
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -159,28 +141,29 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -24,35 +24,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source =
GetUnaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::Float, ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::Float, ParameterType::Float,
vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -61,7 +42,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError0 = 0.0f;
float maxError1 = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -77,8 +59,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
float float_ulps = getAllowedUlpError(f, relaxedMode);
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -175,28 +157,29 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -25,35 +25,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source =
GetUnaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Int, ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::Int, ParameterType::Double,
vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
cl_ulong abs_cl_long(cl_long i)
@@ -68,7 +49,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int64_t maxError2 = 0;
int ftz = f->ftz || gForceFTZ;
@@ -84,8 +66,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision();
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -167,28 +149,29 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -25,35 +25,16 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source =
GetUnaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::Int, ParameterType::Float, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::Int, ParameterType::Float,
vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
cl_ulong abs_cl_long(cl_long i)
@@ -68,7 +49,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int64_t maxError2 = 0;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -89,8 +71,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -172,28 +154,29 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
&gOutBuffer2[j])))
if ((error =
clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -24,34 +24,15 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::ULong, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
ParameterType::ULong, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
cl_ulong random64(MTdata d)
@@ -65,7 +46,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
KernelMatrix kernels;
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ;
double maxErrorVal = 0.0f;
@@ -76,8 +58,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision();
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -132,22 +114,22 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;

View File

@@ -24,34 +24,15 @@
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Float,
ParameterType::UInt, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
using Kernels = std::array<clKernelWrapper, VECTOR_SIZE_COUNT>;
struct BuildKernelInfo2
{
Kernels &kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize,
&(info->kernels[vectorSize]),
&(info->programs[vectorSize]), info->relaxedMode);
BuildKernelInfo &info = *(BuildKernelInfo *)p;
auto generator = [](const std::string &kernel_name, const char *builtin,
cl_uint vector_size_index) {
return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
ParameterType::UInt, vector_size_index);
};
return BuildKernels(info, job_id, generator);
}
} // anonymous namespace
@@ -60,7 +41,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
Kernels kernels;
KernelMatrix kernels;
const unsigned thread_id = 0; // Test is currently not multithreaded.
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
float maxErrorVal = 0.0f;
@@ -76,8 +58,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
float_ulps = f->float_ulps;
// Init the kernels
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -139,22 +121,22 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
{
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{
LogBuildError(programs[j]);
return error;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
sizeof(gInBuffer), &gInBuffer)))
{
LogBuildError(programs[j]);
return error;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
1, NULL, &localCount, NULL, 0,
NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
return error;