mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fix ODR violations in math_brute_force (#1255)
A program having a type (such as ThreadInfo) defined differently in multiple translation units exhibits undefined behaviour. This commit fixes such issues in the math_brute_force component by ensuring most types are local to their translation unit with the help of anonymous namespaces. Later refactoring will be able to extract common definitions to a single place. This patch also removes unnecessary static and typedef keywords. Otherwise, code is only moved around with no change. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -20,10 +20,12 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -115,9 +117,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -126,7 +128,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem inBuf2; // input buffer for the thread
|
||||
@@ -138,9 +140,9 @@ typedef struct ThreadInfo
|
||||
// to 0.
|
||||
MTdata d;
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -162,10 +164,10 @@ typedef struct TestInfo
|
||||
int isNextafter;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValues[] = {
|
||||
const float specialValues[] = {
|
||||
-NAN,
|
||||
-INFINITY,
|
||||
-FLT_MAX,
|
||||
@@ -267,196 +269,10 @@ static const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
static const size_t specialValuesCount =
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -986,3 +802,189 @@ exit:
|
||||
if (overflow) free(overflow);
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||
test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user