Reduce difference between files in math_brute_force (#1138)

* Reduce differences between files

This will help reduce code duplication is future commits.

Some code is moved around, some variables are renamed and some
statements are slightly altered to reduce differences between files in
math_brute_force, yet the semantics remains the same.

The differences were identified using n-way diffs. Many differences
remain however.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Workaround clang-format limitation

Introduces some insignificant spaces to force clang-format to reduce the
indentation and reduce differences between files.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
Marco Antognini
2021-02-10 10:38:31 +00:00
committed by GitHub
parent f6b501352d
commit 8ad1088af9
13 changed files with 483 additions and 423 deletions

View File

@@ -44,10 +44,11 @@ static int BuildKernel(const char *name, const char *operator_symbol,
"* in2 )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" out[i] = in1[i] ",
" out[i] = in1[i] ",
operator_symbol,
" in2[i];\n"
"}\n" };
const char *c3[] = {
"__kernel void ",
name,
@@ -70,7 +71,8 @@ static int BuildKernel(const char *name, const char *operator_symbol,
" size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n"
" float3 f0, f1;\n"
" float3 f0;\n"
" float3 f1;\n"
" switch( parity )\n"
" {\n"
" case 1:\n"
@@ -137,8 +139,9 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
operator_symbol,
" in2[i];\n"
"}\n" };
const char *c3[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void ",
name,
"_kernel",
@@ -160,7 +163,8 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
" size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n"
" double3 d0, d1;\n"
" double3 d0;\n"
" double3 d1;\n"
" switch( parity )\n"
" {\n"
" case 1:\n"
@@ -235,43 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
info->programs + i, info->relaxedMode);
}
// Thread specific data for a worker thread
typedef struct ThreadInfo
{
cl_mem inBuf; // input buffer for the thread
cl_mem inBuf2; // input buffer for the thread
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
MTdata d;
cl_command_queue tQueue; // per thread command queue to improve performance
} ThreadInfo;
typedef struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
// worker thread: k[vector_size][thread_id]
ThreadInfo *
tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
bool relaxedMode; // True if the test is being run in relaxed mode, false
// otherwise.
// no special fields
} TestInfo;
// A table of more difficult cases to get right
static const float specialValuesFloat[] = {
-NAN,
@@ -375,9 +342,46 @@ static const float specialValuesFloat[] = {
+0.0f
};
static size_t specialValuesFloatCount =
static const size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
// Thread specific data for a worker thread
typedef struct ThreadInfo
{
cl_mem inBuf; // input buffer for the thread
cl_mem inBuf2; // input buffer for the thread
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
MTdata d;
cl_command_queue tQueue; // per thread command queue to improve performance
} ThreadInfo;
typedef struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
// worker thread: k[vector_size][thread_id]
ThreadInfo *
tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
bool relaxedMode; // True if the test is being run in relaxed mode, false
// otherwise.
// no special fields
} TestInfo;
static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *p);
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
@@ -398,6 +402,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize
@@ -405,7 +410,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
* RoundUpToNextPowerOfTwo(test_info.threadCount));
}
test_info.step = test_info.subBufferSize * test_info.scale;
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
// there was overflow
@@ -481,8 +486,8 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
&region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j])
{
vlog_error("Error: Unable to create sub-buffer of gInBuffer "
"for region {%zd, %zd}\n",
vlog_error("Error: Unable to create sub-buffer of "
"gInBuffer for region {%zd, %zd}\n",
region.origin, region.size);
goto exit;
}
@@ -513,6 +518,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
goto exit;
}
// Run the kernels
if (!gSkipCorrectnessTesting)
{
error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -536,7 +542,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog("passed");
}
if (gMeasureTimes)
{
// Init input arrays
@@ -554,6 +559,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error;
}
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{
@@ -626,7 +632,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n");
exit:
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{
@@ -665,30 +670,31 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id;
fptr func = job->f->func;
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode);
if (relaxedMode)
{
func = job->f->rfunc;
}
int ftz = job->ftz;
MTdata d = tinfo->d;
cl_uint j, k;
cl_int error;
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
const char *name = job->f->name;
cl_uint *t;
cl_float *r, *s, *s2;
cl_uint *t = 0;
cl_float *r = 0;
cl_float *s = 0;
cl_float *s2 = 0;
RoundingMode oldRoundMode;
if (relaxedMode)
{
func = job->f->rfunc;
}
// start the map of the output arrays
cl_event e[VECTOR_SIZE_COUNT];
cl_uint *out[VECTOR_SIZE_COUNT];
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
out[j] = (uint32_t *)clEnqueueMapBuffer(
out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
buffer_size, 0, NULL, e + j, &error);
if (error || NULL == out[j])
@@ -711,7 +717,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
specialValuesFloatCount * specialValuesFloatCount;
int indx = (totalSpecialValueCount - 1) / buffer_elements;
if (job_id <= (cl_uint)indx)
{
// Insert special values
@@ -877,7 +882,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
// an in order queue.
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
{
out[j] = (uint32_t *)clEnqueueMapBuffer(
out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
buffer_size, 0, NULL, NULL, &error);
if (error || NULL == out[j])
@@ -889,9 +894,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
}
// Wait for the last buffer
out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0,
buffer_size, 0, NULL, NULL, &error);
out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, buffer_size,
0, NULL, NULL, &error);
if (error || NULL == out[j])
{
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -1136,6 +1141,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
}
fflush(stdout);
}
exit:
if (overflow) free(overflow);
return error;
@@ -1267,6 +1273,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
float maxError = 0.0f;
double maxErrorVal = 0.0;
double maxErrorVal2 = 0.0;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info
@@ -1275,6 +1282,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize
@@ -1373,7 +1381,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
}
// Init the kernels
{
BuildKernelInfo build_info = { gMinVectorSizeIndex,
@@ -1389,6 +1396,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
goto exit;
}
// Run the kernels
if (!gSkipCorrectnessTesting)
{
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1412,7 +1420,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
vlog("passed");
}
if (gMeasureTimes)
{
// Init input arrays
@@ -1503,7 +1510,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n");
exit:
// Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1551,7 +1557,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
cl_int error;
const char *name = job->f->name;
cl_ulong *t;
cl_double *r, *s, *s2;
cl_double *r;
cl_double *s;
cl_double *s2;
Force64BitFPUPrecision();