Fix ODR violations in math_brute_force (#1255)

A program having a type (such as ThreadInfo) defined differently in
multiple translation units exhibits undefined behaviour.

This commit fixes such issues in the math_brute_force component by
ensuring most types are local to their translation unit with the help of
anonymous namespaces. Later refactoring will be able to extract common
definitions to a single place.

This patch also removes unnecessary static and typedef keywords.
Otherwise, code is only moved around with no change.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
Marco Antognini
2021-05-21 10:07:54 +01:00
committed by GitHub
parent ce1687a408
commit ba9312e4a2
27 changed files with 2400 additions and 2321 deletions

View File

@@ -20,8 +20,10 @@
#include <cstring>
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
cl_kernel *k, cl_program *p, bool relaxedMode)
namespace {
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
typedef struct BuildKernelInfo
struct BuildKernelInfo
{
cl_uint offset; // the first vector size to build
cl_uint kernel_count;
@@ -107,9 +109,9 @@ typedef struct BuildKernelInfo
cl_program *programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
} BuildKernelInfo;
};
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id;
@@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
}
// Thread specific data for a worker thread
typedef struct ThreadInfo
struct ThreadInfo
{
cl_mem inBuf; // input buffer for the thread
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
float maxError; // max error value. Init to 0.
double maxErrorValue; // position of the max error value. Init to 0.
cl_command_queue tQueue; // per thread command queue to improve performance
} ThreadInfo;
};
typedef struct TestInfo
struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
@@ -149,200 +151,9 @@ typedef struct TestInfo
float half_sin_cos_tan_limit;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
} TestInfo;
};
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
TestInfo test_info;
cl_int error;
float maxError = 0.0f;
double maxErrorVal = 0.0;
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float));
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
// there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz =
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
test_info.relaxedMode = relaxedMode;
// cl_kernels aren't thread safe, so we make one for each vector size for
// every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
}
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
i * test_info.subBufferSize * sizeof(cl_float),
test_info.subBufferSize * sizeof(cl_float)
};
test_info.tinfo[i].inBuf =
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
if (error || NULL == test_info.tinfo[i].inBuf)
{
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
"region {%zd, %zd}\n",
region.origin, region.size);
goto exit;
}
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j])
{
vlog_error("Error: Unable to create sub-buffer of "
"gOutBuffer[%d] for region {%zd, %zd}\n",
(int)j, region.origin, region.size);
goto exit;
}
}
test_info.tinfo[i].tQueue =
clCreateCommandQueue(gContext, gDevice, 0, &error);
if (NULL == test_info.tinfo[i].tQueue || error)
{
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
goto exit;
}
}
// Check for special cases for unary float
test_info.isRangeLimited = 0;
test_info.half_sin_cos_tan_limit = 0;
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
{
test_info.isRangeLimited = 1;
test_info.half_sin_cos_tan_limit = 1.0f
+ test_info.ulps
* (FLT_EPSILON / 2.0f); // out of range results from finite
// inputs must be in [-1,1]
}
else if (0 == strcmp(f->name, "half_tan"))
{
test_info.isRangeLimited = 1;
test_info.half_sin_cos_tan_limit =
INFINITY; // out of range resut from finite inputs must be numeric
}
// Init the kernels
{
BuildKernelInfo build_info = {
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
test_info.programs, f->nameInCode, relaxedMode
};
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
goto exit;
}
// Run the kernels
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
{
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
// Accumulate the arithmetic errors
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
if (test_info.tinfo[i].maxError > maxError)
{
maxError = test_info.tinfo[i].maxError;
maxErrorVal = test_info.tinfo[i].maxErrorValue;
}
}
if (error) goto exit;
if (gWimpyMode)
vlog("Wimp pass");
else
vlog("passed");
if (skipTestingRelaxed)
{
vlog(" (rlx skip correctness testing)\n");
goto exit;
}
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
}
vlog("\n");
exit:
// Release
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{
clReleaseProgram(test_info.programs[i]);
if (test_info.k[i])
{
for (cl_uint j = 0; j < test_info.threadCount; j++)
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
}
}
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo);
}
return error;
}
static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
const TestInfo *job = (const TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
@@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
return CL_SUCCESS;
}
} // anonymous namespace
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
TestInfo test_info;
cl_int error;
float maxError = 0.0f;
double maxErrorVal = 0.0;
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float));
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
// there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz =
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
test_info.relaxedMode = relaxedMode;
// cl_kernels aren't thread safe, so we make one for each vector size for
// every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
}
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
i * test_info.subBufferSize * sizeof(cl_float),
test_info.subBufferSize * sizeof(cl_float)
};
test_info.tinfo[i].inBuf =
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
if (error || NULL == test_info.tinfo[i].inBuf)
{
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
"region {%zd, %zd}\n",
region.origin, region.size);
goto exit;
}
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j])
{
vlog_error("Error: Unable to create sub-buffer of "
"gOutBuffer[%d] for region {%zd, %zd}\n",
(int)j, region.origin, region.size);
goto exit;
}
}
test_info.tinfo[i].tQueue =
clCreateCommandQueue(gContext, gDevice, 0, &error);
if (NULL == test_info.tinfo[i].tQueue || error)
{
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
goto exit;
}
}
// Check for special cases for unary float
test_info.isRangeLimited = 0;
test_info.half_sin_cos_tan_limit = 0;
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
{
test_info.isRangeLimited = 1;
test_info.half_sin_cos_tan_limit = 1.0f
+ test_info.ulps
* (FLT_EPSILON / 2.0f); // out of range results from finite
// inputs must be in [-1,1]
}
else if (0 == strcmp(f->name, "half_tan"))
{
test_info.isRangeLimited = 1;
test_info.half_sin_cos_tan_limit =
INFINITY; // out of range resut from finite inputs must be numeric
}
// Init the kernels
{
BuildKernelInfo build_info = {
gMinVectorSizeIndex, test_info.threadCount, test_info.k,
test_info.programs, f->nameInCode, relaxedMode
};
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
goto exit;
}
// Run the kernels
if (!gSkipCorrectnessTesting || skipTestingRelaxed)
{
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
// Accumulate the arithmetic errors
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
if (test_info.tinfo[i].maxError > maxError)
{
maxError = test_info.tinfo[i].maxError;
maxErrorVal = test_info.tinfo[i].maxErrorValue;
}
}
if (error) goto exit;
if (gWimpyMode)
vlog("Wimp pass");
else
vlog("passed");
if (skipTestingRelaxed)
{
vlog(" (rlx skip correctness testing)\n");
goto exit;
}
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
}
vlog("\n");
exit:
// Release
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{
clReleaseProgram(test_info.programs[i]);
if (test_info.k[i])
{
for (cl_uint j = 0; j < test_info.threadCount; j++)
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
}
}
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo);
}
return error;
}