mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fix ODR violations in math_brute_force (#1255)
A program having a type (such as ThreadInfo) defined differently in multiple translation units exhibits undefined behaviour. This commit fixes such issues in the math_brute_force component by ensuring most types are local to their translation unit with the help of anonymous namespaces. Later refactoring will be able to extract common definitions to a single place. This patch also removes unnecessary static and typedef keywords. Otherwise, code is only moved around with no change. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -20,10 +20,12 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -109,7 +111,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -117,9 +119,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -128,7 +130,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -140,9 +142,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -164,10 +166,10 @@ typedef struct TestInfo
|
||||
int isNextafter;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValues[] = {
|
||||
const double specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-DBL_MAX,
|
||||
@@ -277,195 +279,10 @@ static const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
static size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = 0;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -825,3 +642,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = 0;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,10 +20,12 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -115,9 +117,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -126,7 +128,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -138,9 +140,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -162,10 +164,10 @@ typedef struct TestInfo
|
||||
int isNextafter;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -267,196 +269,10 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -986,3 +802,189 @@ exit:
|
||||
if (overflow) free(overflow);
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -108,7 +110,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -116,9 +118,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -139,9 +141,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -159,10 +161,10 @@ typedef struct TestInfo
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
// no special values
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValues[] = {
|
||||
const double specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-DBL_MAX,
|
||||
@@ -272,201 +274,18 @@ static const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
static size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static const int specialValuesInt[] = {
|
||||
const int specialValuesInt[] = {
|
||||
0, 1, 2, 3, 1022, 1023, 1024, INT_MIN,
|
||||
INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
|
||||
};
|
||||
static constexpr size_t specialValuesIntCount =
|
||||
|
||||
constexpr size_t specialValuesIntCount =
|
||||
sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||
* sizeof(cl_int),
|
||||
test_info.subBufferSize * sizeof(cl_int) };
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -744,3 +563,187 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||
* sizeof(cl_int),
|
||||
test_info.subBufferSize * sizeof(cl_int) };
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -106,7 +108,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -114,9 +116,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -137,9 +139,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -157,10 +159,10 @@ typedef struct TestInfo
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
// no special values
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -262,204 +264,20 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static const int specialValuesInt[] = {
|
||||
const int specialValuesInt[] = {
|
||||
0, 1, 2, 3, 126, 127,
|
||||
128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1,
|
||||
-2, -3, -126, -127, -128, -0x02000001,
|
||||
-0x04000001, -1465264071, -1488522147,
|
||||
};
|
||||
static size_t specialValuesIntCount =
|
||||
|
||||
constexpr size_t specialValuesIntCount =
|
||||
sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||
* sizeof(cl_int),
|
||||
test_info.subBufferSize * sizeof(cl_int) };
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -738,3 +556,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||
* sizeof(cl_int),
|
||||
test_info.subBufferSize * sizeof(cl_int) };
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,9 +20,11 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -108,7 +110,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -116,9 +118,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *operator_symbol;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -139,9 +141,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -161,10 +163,10 @@ typedef struct TestInfo
|
||||
// otherwise.
|
||||
|
||||
// no special fields
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValues[] = {
|
||||
const double specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-DBL_MAX,
|
||||
@@ -274,192 +276,10 @@ static const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -793,3 +613,185 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,9 +20,11 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -106,7 +108,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -114,9 +116,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *operator_symbol;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -137,9 +139,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -159,10 +161,10 @@ typedef struct TestInfo
|
||||
// otherwise.
|
||||
|
||||
// no special fields
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -264,194 +266,10 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -923,3 +741,187 @@ exit:
|
||||
if (overflow) free(overflow);
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -115,16 +117,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct ComputeReferenceInfoD_
|
||||
struct ComputeReferenceInfoD
|
||||
{
|
||||
const double *x;
|
||||
const double *y;
|
||||
@@ -141,9 +143,9 @@ typedef struct ComputeReferenceInfoD_
|
||||
long double (*f_ffpI)(long double, long double, int *);
|
||||
cl_uint lim;
|
||||
cl_uint count;
|
||||
} ComputeReferenceInfoD;
|
||||
};
|
||||
|
||||
static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
{
|
||||
ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
|
||||
cl_uint lim = cri->lim;
|
||||
@@ -165,6 +167,8 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -130,7 +132,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct ComputeReferenceInfoF_
|
||||
struct ComputeReferenceInfoF
|
||||
{
|
||||
const float *x;
|
||||
const float *y;
|
||||
@@ -139,9 +141,9 @@ typedef struct ComputeReferenceInfoF_
|
||||
double (*f_ffpI)(double, double, int *);
|
||||
cl_uint lim;
|
||||
cl_uint count;
|
||||
} ComputeReferenceInfoF;
|
||||
};
|
||||
|
||||
static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
{
|
||||
ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
|
||||
cl_uint lim = cri->lim;
|
||||
@@ -161,6 +163,8 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||
}
|
||||
|
||||
#define unaryF NULL
|
||||
#define i_unaryF NULL
|
||||
#define unaryF_u NULL
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -100,16 +102,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -117,6 +119,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -98,16 +100,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -115,6 +117,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -115,9 +117,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -126,16 +128,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -150,11 +152,10 @@ typedef struct TestInfo
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValues[] = {
|
||||
const double specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-DBL_MAX,
|
||||
@@ -264,174 +265,10 @@ static const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (size_t i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -735,3 +572,167 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (size_t i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -105,7 +107,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -113,9 +115,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -124,16 +126,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -148,11 +150,10 @@ typedef struct TestInfo
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -254,175 +255,10 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -724,3 +560,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -109,9 +111,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -120,14 +122,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -142,160 +144,9 @@ typedef struct TestInfo
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
};
|
||||
|
||||
} TestInfo;
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -506,3 +357,153 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -100,7 +102,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -108,9 +110,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -119,14 +121,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -141,161 +143,9 @@ typedef struct TestInfo
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
};
|
||||
|
||||
} TestInfo;
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -521,3 +371,154 @@ exit:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -130,6 +132,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -111,16 +113,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -128,6 +130,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -23,8 +23,10 @@
|
||||
#define CORRECTLY_ROUNDED 0
|
||||
#define FLUSHED 1
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -116,16 +118,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -134,7 +136,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValues[] = {
|
||||
const double specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-DBL_MAX,
|
||||
@@ -202,9 +204,11 @@ static const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
|
||||
@@ -23,8 +23,10 @@
|
||||
#define CORRECTLY_ROUNDED 0
|
||||
#define FLUSHED 1
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -114,16 +116,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -210,9 +212,11 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -109,9 +111,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -120,16 +122,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double maxErrorValue; // position of the max error value. Init to 0.
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -151,174 +153,9 @@ typedef struct TestInfo
|
||||
float half_sin_cos_tan_limit;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -547,3 +384,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_double));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
test_info.subBufferSize * sizeof(cl_double)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -107,9 +109,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double maxErrorValue; // position of the max error value. Init to 0.
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -149,200 +151,9 @@ typedef struct TestInfo
|
||||
float half_sin_cos_tan_limit;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for special cases for unary float
|
||||
test_info.isRangeLimited = 0;
|
||||
test_info.half_sin_cos_tan_limit = 0;
|
||||
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit = 1.0f
|
||||
+ test_info.ulps
|
||||
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||
// inputs must be in [-1,1]
|
||||
}
|
||||
else if (0 == strcmp(f->name, "half_tan"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit =
|
||||
INFINITY; // out of range resut from finite inputs must be numeric
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
if (skipTestingRelaxed)
|
||||
{
|
||||
vlog(" (rlx skip correctness testing)\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for special cases for unary float
|
||||
test_info.isRangeLimited = 0;
|
||||
test_info.half_sin_cos_tan_limit = 0;
|
||||
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit = 1.0f
|
||||
+ test_info.ulps
|
||||
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||
// inputs must be in [-1,1]
|
||||
}
|
||||
else if (0 == strcmp(f->name, "half_tan"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit =
|
||||
INFINITY; // out of range resut from finite inputs must be numeric
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
if (skipTestingRelaxed)
|
||||
{
|
||||
vlog(" (rlx skip correctness testing)\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -107,16 +109,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -124,6 +126,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -105,16 +107,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -122,6 +124,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -108,16 +110,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -125,12 +127,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
static cl_ulong abs_cl_long(cl_long i)
|
||||
cl_ulong abs_cl_long(cl_long i)
|
||||
{
|
||||
cl_long mask = i >> 63;
|
||||
return (i ^ mask) - mask;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -21,8 +21,10 @@
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -106,16 +108,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -123,12 +125,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
static cl_ulong abs_cl_long(cl_long i)
|
||||
cl_ulong abs_cl_long(cl_long i)
|
||||
{
|
||||
cl_long mask = i >> 63;
|
||||
return (i ^ mask) - mask;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
@@ -102,16 +104,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -119,11 +121,13 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
static cl_ulong random64(MTdata d)
|
||||
cl_ulong random64(MTdata d)
|
||||
{
|
||||
return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -99,16 +101,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_kernel *kernels;
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -116,6 +118,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
info->programs + i, info->relaxedMode);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
Reference in New Issue
Block a user