mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-20 14:39:01 +00:00
Fix ODR violations in math_brute_force (#1255)
A program having a type (such as ThreadInfo) defined differently in multiple translation units exhibits undefined behaviour. This commit fixes such issues in the math_brute_force component by ensuring most types are local to their translation unit with the help of anonymous namespaces. Later refactoring will be able to extract common definitions to a single place. This patch also removes unnecessary static and typedef keywords. Otherwise, code is only moved around with no change. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -20,8 +20,10 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
@@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
cl_uint offset; // the first vector size to build
|
||||
cl_uint kernel_count;
|
||||
@@ -107,9 +109,9 @@ typedef struct BuildKernelInfo
|
||||
cl_program *programs;
|
||||
const char *nameInCode;
|
||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||
} BuildKernelInfo;
|
||||
};
|
||||
|
||||
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||
cl_uint i = info->offset + job_id;
|
||||
@@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
struct ThreadInfo
|
||||
{
|
||||
cl_mem inBuf; // input buffer for the thread
|
||||
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double maxErrorValue; // position of the max error value. Init to 0.
|
||||
cl_command_queue tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
};
|
||||
|
||||
typedef struct TestInfo
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
@@ -149,200 +151,9 @@ typedef struct TestInfo
|
||||
float half_sin_cos_tan_limit;
|
||||
bool relaxedMode; // True if test is running in relaxed mode, false
|
||||
// otherwise.
|
||||
} TestInfo;
|
||||
};
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
|
||||
|
||||
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for special cases for unary float
|
||||
test_info.isRangeLimited = 0;
|
||||
test_info.half_sin_cos_tan_limit = 0;
|
||||
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit = 1.0f
|
||||
+ test_info.ulps
|
||||
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||
// inputs must be in [-1,1]
|
||||
}
|
||||
else if (0 == strcmp(f->name, "half_tan"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit =
|
||||
INFINITY; // out of range resut from finite inputs must be numeric
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
if (skipTestingRelaxed)
|
||||
{
|
||||
vlog(" (rlx skip correctness testing)\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
const TestInfo *job = (const TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
@@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info, 0, sizeof(test_info));
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_float));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
if (NULL == test_info.k[i])
|
||||
{
|
||||
vlog_error("Error: Unable to allocate storage for kernels!\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.k[i], 0, array_size);
|
||||
}
|
||||
test_info.tinfo =
|
||||
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
if (NULL == test_info.tinfo)
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to allocate storage for thread specific data.\n");
|
||||
error = CL_OUT_OF_HOST_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
test_info.subBufferSize * sizeof(cl_float)
|
||||
};
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for special cases for unary float
|
||||
test_info.isRangeLimited = 0;
|
||||
test_info.half_sin_cos_tan_limit = 0;
|
||||
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit = 1.0f
|
||||
+ test_info.ulps
|
||||
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||
// inputs must be in [-1,1]
|
||||
}
|
||||
else if (0 == strcmp(f->name, "half_tan"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit =
|
||||
INFINITY; // out of range resut from finite inputs must be numeric
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = {
|
||||
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode, relaxedMode
|
||||
};
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
|
||||
{
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error) goto exit;
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
if (skipTestingRelaxed)
|
||||
{
|
||||
vlog(" (rlx skip correctness testing)\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
}
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
free(test_info.tinfo);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user