Reduce difference between files in math_brute_force (#1138)

* Reduce differences between files

This will help reduce code duplication is future commits.

Some code is moved around, some variables are renamed and some
statements are slightly altered to reduce differences between files in
math_brute_force, yet the semantics remains the same.

The differences were identified using n-way diffs. Many differences
remain however.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Workaround clang-format limitation

Introduces some insignificant spaces to force clang-format to reduce the
indentation and reduce differences between files.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
Marco Antognini
2021-02-10 10:38:31 +00:00
committed by GitHub
parent f6b501352d
commit 8ad1088af9
13 changed files with 483 additions and 423 deletions

View File

@@ -25,9 +25,6 @@ int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata, int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
bool relaxedMode); bool relaxedMode);
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float, extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float,
TestFunc_Double_Double_Double }; TestFunc_Double_Double_Double };
@@ -36,6 +33,8 @@ extern const vtbl _binary_nextafter = {
TestFunc_Double_Double_Double_nextafter TestFunc_Double_Double_Double_nextafter
}; };
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
cl_kernel *k, cl_program *p, bool relaxedMode) cl_kernel *k, cl_program *p, bool relaxedMode)
@@ -76,7 +75,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1;\n" " float3 f0;\n"
" float3 f1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -163,7 +163,8 @@ static int BuildKernelDouble(const char *name, int vectorSize,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 d0, d1;\n" " double3 d0;\n"
" double3 d1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -208,6 +209,35 @@ static int BuildKernelDouble(const char *name, int vectorSize,
relaxedMode); relaxedMode);
} }
typedef struct BuildKernelInfo
{
cl_uint offset; // the first vector size to build
cl_uint kernel_count;
cl_kernel **kernels;
cl_program *programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
} BuildKernelInfo;
static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode);
}
static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id;
return BuildKernelDouble(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i,
info->relaxedMode);
}
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const float specialValuesFloat[] = { static const float specialValuesFloat[] = {
-NAN, -NAN,
@@ -311,38 +341,9 @@ static const float specialValuesFloat[] = {
+0.0f +0.0f
}; };
static size_t specialValuesFloatCount = static const size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
typedef struct BuildKernelInfo
{
cl_uint offset; // the first vector size to build
cl_uint kernel_count;
cl_kernel **kernels;
cl_program *programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
} BuildKernelInfo;
static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode);
}
static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id;
return BuildKernelDouble(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i,
info->relaxedMode);
}
// Thread specific data for a worker thread // Thread specific data for a worker thread
typedef struct ThreadInfo typedef struct ThreadInfo
{ {
@@ -426,11 +427,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz = test_info.ftz =
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
test_info.relaxedMode = relaxedMode;
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
test_info.isNextafter = isNextafter; test_info.isNextafter = isNextafter;
test_info.relaxedMode = relaxedMode;
// cl_kernels aren't thread safe, so we make one for each vector size for // cl_kernels aren't thread safe, so we make one for each vector size for
// every thread // every thread
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -543,12 +544,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
vlog("passed"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++) for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
{ {
p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000; p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
@@ -561,6 +561,7 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL))) BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{ {
@@ -568,7 +569,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
return error; return error;
} }
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
@@ -633,7 +633,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
@@ -684,23 +683,21 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
int skipNanInf = job->skipNanInf; int skipNanInf = job->skipNanInf;
int isNextafter = job->isNextafter; int isNextafter = job->isNextafter;
cl_uint *t = 0; cl_uint *t = 0;
float *r = 0, *s = 0, *s2 = 0; cl_float *r = 0;
cl_float *s = 0;
cl_float *s2 = 0;
cl_int copysign_test = 0; cl_int copysign_test = 0;
RoundingMode oldRoundMode; RoundingMode oldRoundMode;
int skipVerification = 0; int skipVerification = 0;
if (relaxedMode) if (relaxedMode)
{ {
func = job->f->rfunc;
if (strcmp(name, "pow") == 0 && gFastRelaxedDerived) if (strcmp(name, "pow") == 0 && gFastRelaxedDerived)
{ {
func = job->f->rfunc;
ulps = INFINITY; ulps = INFINITY;
skipVerification = 1; skipVerification = 1;
} }
else
{
func = job->f->rfunc;
}
} }
// start the map of the output arrays // start the map of the output arrays
@@ -744,7 +741,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
{ {
fp[j] = specialValuesFloat[x]; fp[j] = specialValuesFloat[x];
fp2[j] = specialValuesFloat[y]; fp2[j] = specialValuesFloat[y];
if (++x >= specialValuesFloatCount) ++x;
if (x >= specialValuesFloatCount)
{ {
x = 0; x = 0;
y++; y++;
@@ -1203,13 +1201,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
fflush(stdout); fflush(stdout);
} }
exit: exit:
if (overflow) free(overflow); if (overflow) free(overflow);
return error; return error;
} }
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const double specialValuesDouble[] = { static const double specialValuesDouble[] = {
-NAN, -NAN,
@@ -1444,10 +1440,10 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
vlog_error("clCreateCommandQueue failed. (%d)\n", error); vlog_error("clCreateCommandQueue failed. (%d)\n", error);
goto exit; goto exit;
} }
test_info.tinfo[i].d = init_genrand(genrand_int32(d)); test_info.tinfo[i].d = init_genrand(genrand_int32(d));
} }
// Init the kernels // Init the kernels
{ {
BuildKernelInfo build_info = { BuildKernelInfo build_info = {
@@ -1460,6 +1456,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1500,6 +1497,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL))) BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{ {
@@ -1507,7 +1505,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
return error; return error;
} }
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
@@ -1573,7 +1570,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
// Release // Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1622,7 +1618,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
int isNextafter = job->isNextafter; int isNextafter = job->isNextafter;
cl_ulong *t; cl_ulong *t;
cl_double *r, *s, *s2; cl_double *r;
cl_double *s;
cl_double *s2;
Force64BitFPUPrecision(); Force64BitFPUPrecision();
@@ -1970,6 +1968,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
} }
fflush(stdout); fflush(stdout);
} }
exit: exit:
return error; return error;
} }

View File

@@ -44,10 +44,11 @@ static int BuildKernel(const char *name, const char *operator_symbol,
"* in2 )\n" "* in2 )\n"
"{\n" "{\n"
" size_t i = get_global_id(0);\n" " size_t i = get_global_id(0);\n"
" out[i] = in1[i] ", " out[i] = in1[i] ",
operator_symbol, operator_symbol,
" in2[i];\n" " in2[i];\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void ", "__kernel void ",
name, name,
@@ -70,7 +71,8 @@ static int BuildKernel(const char *name, const char *operator_symbol,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1;\n" " float3 f0;\n"
" float3 f1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -137,8 +139,9 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
operator_symbol, operator_symbol,
" in2[i];\n" " in2[i];\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void ", "__kernel void ",
name, name,
"_kernel", "_kernel",
@@ -160,7 +163,8 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 d0, d1;\n" " double3 d0;\n"
" double3 d1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -235,43 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
info->programs + i, info->relaxedMode); info->programs + i, info->relaxedMode);
} }
// Thread specific data for a worker thread
typedef struct ThreadInfo
{
cl_mem inBuf; // input buffer for the thread
cl_mem inBuf2; // input buffer for the thread
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
MTdata d;
cl_command_queue tQueue; // per thread command queue to improve performance
} ThreadInfo;
typedef struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
// worker thread: k[vector_size][thread_id]
ThreadInfo *
tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
bool relaxedMode; // True if the test is being run in relaxed mode, false
// otherwise.
// no special fields
} TestInfo;
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const float specialValuesFloat[] = { static const float specialValuesFloat[] = {
-NAN, -NAN,
@@ -375,9 +342,46 @@ static const float specialValuesFloat[] = {
+0.0f +0.0f
}; };
static size_t specialValuesFloatCount = static const size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
// Thread specific data for a worker thread
typedef struct ThreadInfo
{
cl_mem inBuf; // input buffer for the thread
cl_mem inBuf2; // input buffer for the thread
cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
MTdata d;
cl_command_queue tQueue; // per thread command queue to improve performance
} ThreadInfo;
typedef struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
// worker thread: k[vector_size][thread_id]
ThreadInfo *
tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
bool relaxedMode; // True if the test is being run in relaxed mode, false
// otherwise.
// no special fields
} TestInfo;
static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *p); static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *p);
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
@@ -398,6 +402,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float)); test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -405,7 +410,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
* RoundUpToNextPowerOfTwo(test_info.threadCount)); * RoundUpToNextPowerOfTwo(test_info.threadCount));
} }
test_info.step = test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale) if (test_info.step / test_info.subBufferSize != test_info.scale)
{ {
// there was overflow // there was overflow
@@ -481,8 +486,8 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
&region, &error); &region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of "
"for region {%zd, %zd}\n", "gInBuffer for region {%zd, %zd}\n",
region.origin, region.size); region.origin, region.size);
goto exit; goto exit;
} }
@@ -513,6 +518,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -536,7 +542,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog("passed"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
@@ -554,6 +559,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL))) BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{ {
@@ -626,7 +632,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
@@ -665,30 +670,31 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = job->tinfo + thread_id;
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode; bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode); float ulps = getAllowedUlpError(job->f, relaxedMode);
if (relaxedMode)
{
func = job->f->rfunc;
}
int ftz = job->ftz;
MTdata d = tinfo->d; MTdata d = tinfo->d;
cl_uint j, k; cl_uint j, k;
cl_int error; cl_int error;
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
const char *name = job->f->name; const char *name = job->f->name;
cl_uint *t; cl_uint *t = 0;
cl_float *r, *s, *s2; cl_float *r = 0;
cl_float *s = 0;
cl_float *s2 = 0;
RoundingMode oldRoundMode; RoundingMode oldRoundMode;
if (relaxedMode)
{
func = job->f->rfunc;
}
// start the map of the output arrays // start the map of the output arrays
cl_event e[VECTOR_SIZE_COUNT]; cl_event e[VECTOR_SIZE_COUNT];
cl_uint *out[VECTOR_SIZE_COUNT]; cl_uint *out[VECTOR_SIZE_COUNT];
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
buffer_size, 0, NULL, e + j, &error); buffer_size, 0, NULL, e + j, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -711,7 +717,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
specialValuesFloatCount * specialValuesFloatCount; specialValuesFloatCount * specialValuesFloatCount;
int indx = (totalSpecialValueCount - 1) / buffer_elements; int indx = (totalSpecialValueCount - 1) / buffer_elements;
if (job_id <= (cl_uint)indx) if (job_id <= (cl_uint)indx)
{ {
// Insert special values // Insert special values
@@ -877,7 +882,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
// an in order queue. // an in order queue.
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
buffer_size, 0, NULL, NULL, &error); buffer_size, 0, NULL, NULL, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -889,9 +894,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
// Wait for the last buffer // Wait for the last buffer
out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, CL_TRUE, CL_MAP_READ, 0, buffer_size,
buffer_size, 0, NULL, NULL, &error); 0, NULL, NULL, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
{ {
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error); vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -1136,6 +1141,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
fflush(stdout); fflush(stdout);
} }
exit: exit:
if (overflow) free(overflow); if (overflow) free(overflow);
return error; return error;
@@ -1267,6 +1273,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
double maxErrorVal2 = 0.0; double maxErrorVal2 = 0.0;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
@@ -1275,6 +1282,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_double)); test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -1373,7 +1381,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
test_info.tinfo[i].d = init_genrand(genrand_int32(d)); test_info.tinfo[i].d = init_genrand(genrand_int32(d));
} }
// Init the kernels // Init the kernels
{ {
BuildKernelInfo build_info = { gMinVectorSizeIndex, BuildKernelInfo build_info = { gMinVectorSizeIndex,
@@ -1389,6 +1396,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1412,7 +1420,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
vlog("passed"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
@@ -1503,7 +1510,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
// Release // Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1551,7 +1557,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
cl_int error; cl_int error;
const char *name = job->f->name; const char *name = job->f->name;
cl_ulong *t; cl_ulong *t;
cl_double *r, *s, *s2; cl_double *r;
cl_double *s;
cl_double *s2;
Force64BitFPUPrecision(); Force64BitFPUPrecision();

View File

@@ -15,8 +15,8 @@
// //
#include "Utility.h" #include "Utility.h"
#include <string.h>
#include <limits.h> #include <limits.h>
#include <string.h>
#include "FunctionList.h" #include "FunctionList.h"
int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode); int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
@@ -228,7 +228,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
info->relaxedMode); info->relaxedMode);
} }
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const float specialValuesFloat[] = { static const float specialValuesFloat[] = {
-NAN, -NAN,
@@ -331,9 +330,9 @@ static const float specialValuesFloat[] = {
MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
+0.0f +0.0f
}; };
static size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
static const size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
static const int specialValuesInt[] = { static const int specialValuesInt[] = {
0, 1, 2, 3, 126, 127, 0, 1, 2, 3, 126, 127,
@@ -484,8 +483,8 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
&region, &error); &region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of "
"for region {%zd, %zd}\n", "gInBuffer for region {%zd, %zd}\n",
region.origin, region.size); region.origin, region.size);
goto exit; goto exit;
} }
@@ -497,6 +496,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
vlog_error("clCreateCommandQueue failed. (%d)\n", error); vlog_error("clCreateCommandQueue failed. (%d)\n", error);
goto exit; goto exit;
} }
test_info.tinfo[i].d = init_genrand(genrand_int32(d)); test_info.tinfo[i].d = init_genrand(genrand_int32(d));
} }
@@ -537,7 +537,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
vlog("passed"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
@@ -555,6 +554,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL))) BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{ {
@@ -627,7 +627,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
@@ -658,7 +657,6 @@ exit:
return error; return error;
} }
static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; const TestInfo *job = (const TestInfo *)data;
@@ -666,23 +664,24 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = job->tinfo + thread_id;
float ulps = job->ulps;
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
float ulps = job->ulps;
MTdata d = tinfo->d; MTdata d = tinfo->d;
cl_uint j, k; cl_uint j, k;
cl_int error; cl_int error;
const char *name = job->f->name; const char *name = job->f->name;
cl_uint *t; cl_uint *t = 0;
cl_float *r, *s; cl_float *r = 0;
cl_int *s2; cl_float *s = 0;
cl_int *s2 = 0;
// start the map of the output arrays // start the map of the output arrays
cl_event e[VECTOR_SIZE_COUNT]; cl_event e[VECTOR_SIZE_COUNT];
cl_uint *out[VECTOR_SIZE_COUNT]; cl_uint *out[VECTOR_SIZE_COUNT];
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
buffer_size, 0, NULL, e + j, &error); buffer_size, 0, NULL, e + j, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -700,9 +699,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
j = 0; j = 0;
int totalSpecialValueCount = int totalSpecialValueCount =
specialValuesFloatCount * specialValuesIntCount; specialValuesFloatCount * specialValuesIntCount;
int indx = (totalSpecialValueCount - 1) / buffer_elements; int indx = (totalSpecialValueCount - 1) / buffer_elements;
if (job_id <= (cl_uint)indx) if (job_id <= (cl_uint)indx)
{ // test edge cases { // test edge cases
float *fp = (float *)p; float *fp = (float *)p;
@@ -716,7 +717,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
{ {
fp[j] = specialValuesFloat[x]; fp[j] = specialValuesFloat[x];
ip2[j] = specialValuesInt[y]; ip2[j] = specialValuesInt[y];
if (++x >= specialValuesFloatCount) ++x;
if (x >= specialValuesFloatCount)
{ {
x = 0; x = 0;
y++; y++;
@@ -820,7 +822,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
// an in order queue. // an in order queue.
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
buffer_size, 0, NULL, NULL, &error); buffer_size, 0, NULL, NULL, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -832,9 +834,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
// Wait for the last buffer // Wait for the last buffer
out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, CL_TRUE, CL_MAP_READ, 0, buffer_size,
buffer_size, 0, NULL, NULL, &error); 0, NULL, NULL, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
{ {
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error); vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -1057,6 +1059,7 @@ static const double specialValuesDouble[] = {
MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
+0.0, +0.0,
}; };
static size_t specialValuesDoubleCount = static size_t specialValuesDoubleCount =
sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]); sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]);
@@ -1165,12 +1168,9 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
/* Qualcomm fix: 9461 read-write flags must be compatible with
* parent buffer */
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error); &region, &error);
/* Qualcomm fix: end */
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1190,7 +1190,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
test_info.tinfo[i].d = init_genrand(genrand_int32(d)); test_info.tinfo[i].d = init_genrand(genrand_int32(d));
} }
// Init the kernels // Init the kernels
{ {
BuildKernelInfo build_info = { BuildKernelInfo build_info = {
@@ -1320,7 +1319,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
vlog("\n"); vlog("\n");
exit: exit:
// Release // Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1367,7 +1365,8 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
cl_int error; cl_int error;
const char *name = job->f->name; const char *name = job->f->name;
cl_ulong *t; cl_ulong *t;
cl_double *r, *s; cl_double *r;
cl_double *s;
cl_int *s2; cl_int *s2;
Force64BitFPUPrecision(); Force64BitFPUPrecision();
@@ -1398,6 +1397,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
int totalSpecialValueCount = int totalSpecialValueCount =
specialValuesDoubleCount * specialValuesInt2Count; specialValuesDoubleCount * specialValuesInt2Count;
int indx = (totalSpecialValueCount - 1) / buffer_elements; int indx = (totalSpecialValueCount - 1) / buffer_elements;
if (job_id <= (cl_uint)indx) if (job_id <= (cl_uint)indx)
{ // test edge cases { // test edge cases
cl_double *fp = (cl_double *)p; cl_double *fp = (cl_double *)p;

View File

@@ -40,7 +40,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* in1, __global float", "* in1, __global float",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in2)\n" "* in2 )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1;\n" " float3 f0;\n"
" float3 f1;\n"
" int3 i0 = 0xdeaddead;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -83,7 +85,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
" break;\n" " break;\n"
" }\n" " }\n"
" int3 i0 = 0xdeaddead;\n"
" f0 = ", " f0 = ",
name, name,
"( f0, f1, &i0 );\n" "( f0, f1, &i0 );\n"
@@ -132,12 +133,12 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* in1, __global double", "* in1, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in2)\n" "* in2 )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in1[i], in2[i], out2 + i );\n" "( in1[i], in2[i], out2[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
@@ -164,7 +165,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 d0, d1;\n" " double3 d0;\n"
" double3 d1;\n"
" int3 i0 = 0xdeaddead;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -176,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
" break;\n" " break;\n"
" }\n" " }\n"
" int3 i0 = 0xdeaddead;\n"
" d0 = ", " d0 = ",
name, name,
"( d0, d1, &i0 );\n" "( d0, d1, &i0 );\n"
@@ -309,20 +311,22 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
uint64_t i; uint64_t i;
uint32_t j, k; uint32_t j, k;
int error; int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
cl_program programs[VECTOR_SIZE_COUNT]; cl_program programs[VECTOR_SIZE_COUNT];
cl_kernel kernels[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f; float maxError = 0.0f;
float float_ulps;
int64_t maxError2 = 0;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
int64_t maxError2 = 0;
float maxErrorVal = 0.0f; float maxErrorVal = 0.0f;
float maxErrorVal2 = 0.0f; float maxErrorVal2 = 0.0f;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
cl_uint threadCount = GetThreadCount(); cl_uint threadCount = GetThreadCount();
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
float float_ulps;
if (gIsEmbedded) if (gIsEmbedded)
float_ulps = f->float_embedded_ulps; float_ulps = f->float_embedded_ulps;
else else
@@ -485,7 +489,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{ {
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{ {
uint32_t *q = (uint32_t *)gOut[k]; uint32_t *q = (uint32_t *)(gOut[k]);
int32_t *q2 = (int32_t *)gOut2[k]; int32_t *q2 = (int32_t *)gOut2[k];
// Check for exact match to correctly rounded result // Check for exact match to correctly rounded result
@@ -695,9 +699,11 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
{
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
}
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
@@ -823,9 +829,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
if ((error = ThreadPool_Do(BuildKernel_DoubleFn, if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex, gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info))) &build_info)))
{
return error; return error;
}
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -1185,7 +1189,6 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
vlog("."); vlog(".");
} }
fflush(stdout); fflush(stdout);
} }
} }
@@ -1202,7 +1205,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
// Init input array // Init input array
double *p = (double *)gIn; double *p = (double *)gIn;
for (j = 0; j < bufferSize / sizeof(double); j++) for (j = 0; j < bufferSize / sizeof(cl_double); j++)
p[j] = DoubleFromUInt32(genrand_int32(d)); p[j] = DoubleFromUInt32(genrand_int32(d));
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))

View File

@@ -42,6 +42,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
name, name,
"( in[i] );\n" "( in[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -87,7 +88,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -114,7 +114,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global double", "* out, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -177,7 +177,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
kernSize = sizeof(c3) / sizeof(c3[0]); kernSize = sizeof(c3) / sizeof(c3[0]);
} }
char testName[32]; char testName[32];
snprintf(testName, sizeof(testName) - 1, "math_kernel%s", snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
sizeNames[vectorSize]); sizeNames[vectorSize]);
@@ -219,7 +218,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
int error; int error;
cl_program programs[VECTOR_SIZE_COUNT]; cl_program programs[VECTOR_SIZE_COUNT];
cl_kernel kernels[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT];
int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1);
@@ -234,27 +233,30 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
if (gWimpyMode) if (gWimpyMode)
{ {
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
p[j] = (uint32_t)i + j * scale; p[j] = (cl_uint)i + j * scale;
} }
else else
{ {
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
p[j] = (uint32_t)i + j; p[j] = (uint32_t)i + j;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
@@ -281,7 +283,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_float); size_t vectorSize = sizeValues[j] * sizeof(cl_float);
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; size_t localCount = (bufferSize + vectorSize - 1)
/ vectorSize; // bufferSize / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j]))) &gOutBuffer[j])))
{ {
@@ -396,8 +399,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_float); size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; size_t localCount = (bufferSize + vectorSize - 1)
/ vectorSize; // bufferSize / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j]))) &gOutBuffer[j])))
{ {
@@ -447,6 +451,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
} }
vlog("\n"); vlog("\n");
exit: exit:
RestoreFPState(&oldMode); RestoreFPState(&oldMode);
// Release // Release
@@ -481,13 +486,13 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -504,6 +509,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
for (j = 0; j < bufferSize / sizeof(cl_double); j++) for (j = 0; j < bufferSize / sizeof(cl_double); j++)
p[j] = DoubleFromUInt32((uint32_t)i + j); p[j] = DoubleFromUInt32((uint32_t)i + j);
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
@@ -529,8 +535,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_double); size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; size_t localCount = (bufferSize + vectorSize - 1)
/ vectorSize; // bufferSize / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j]))) &gOutBuffer[j])))
{ {
@@ -616,6 +623,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
vlog("."); vlog(".");
} }
fflush(stdout); fflush(stdout);
} }
} }
@@ -698,7 +706,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
vlog("\n"); vlog("\n");
exit: exit:
RestoreFPState(&oldMode); RestoreFPState(&oldMode);
// Release // Release

View File

@@ -64,7 +64,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1;\n" " float3 f0;\n"
" float3 f1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -92,7 +93,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -110,7 +110,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode); relaxedMode);
} }
static int BuildKernelDouble(const char *name, int vectorSize, static int BuildKernelDouble(const char *name, int vectorSize,
cl_uint kernel_count, cl_kernel *k, cl_program *p, cl_uint kernel_count, cl_kernel *k, cl_program *p,
bool relaxedMode) bool relaxedMode)
@@ -153,7 +152,8 @@ static int BuildKernelDouble(const char *name, int vectorSize,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 f0, f1;\n" " double3 f0;\n"
" double3 f1;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -190,7 +190,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
kernSize = sizeof(c3) / sizeof(c3[0]); kernSize = sizeof(c3) / sizeof(c3[0]);
} }
char testName[32]; char testName[32];
snprintf(testName, sizeof(testName) - 1, "math_kernel%s", snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
sizeNames[vectorSize]); sizeNames[vectorSize]);
@@ -228,7 +227,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
info->relaxedMode); info->relaxedMode);
} }
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const float specialValuesFloat[] = { static const float specialValuesFloat[] = {
-NAN, -NAN,
@@ -379,6 +377,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float)); test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -460,8 +459,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
&region, &error); &region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of "
"for region {%zd, %zd}\n", "gInBuffer for region {%zd, %zd}\n",
region.origin, region.size); region.origin, region.size);
goto exit; goto exit;
} }
@@ -489,7 +488,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels // Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
@@ -506,8 +504,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++) for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
{ {
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
@@ -520,6 +518,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL))) BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{ {
@@ -531,8 +530,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_float); size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(test_info.k[j][0], 0, if ((error = clSetKernelArg(test_info.k[j][0], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j]))) sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{ {
@@ -586,6 +586,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
f->name, sizeNames[j]); f->name, sizeNames[j]);
} }
} }
vlog("\n"); vlog("\n");
exit: exit:
@@ -631,8 +632,10 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint j, k; cl_uint j, k;
cl_int error; cl_int error;
const char *name = job->f->name; const char *name = job->f->name;
cl_int *t, *r; cl_int *t = 0;
cl_float *s, *s2; cl_int *r = 0;
cl_float *s = 0;
cl_float *s2 = 0;
// start the map of the output arrays // start the map of the output arrays
cl_event e[VECTOR_SIZE_COUNT]; cl_event e[VECTOR_SIZE_COUNT];
@@ -657,6 +660,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
j = 0; j = 0;
int totalSpecialValueCount = int totalSpecialValueCount =
specialValuesFloatCount * specialValuesFloatCount; specialValuesFloatCount * specialValuesFloatCount;
int indx = (totalSpecialValueCount - 1) / buffer_elements; int indx = (totalSpecialValueCount - 1) / buffer_elements;
@@ -674,7 +678,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
{ {
fp[j] = specialValuesFloat[x]; fp[j] = specialValuesFloat[x];
fp2[j] = specialValuesFloat[y]; fp2[j] = specialValuesFloat[y];
if (++x >= specialValuesFloatCount) ++x;
if (x >= specialValuesFloatCount)
{ {
x = 0; x = 0;
y++; y++;
@@ -690,7 +695,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
p2[j] = genrand_int32(d); p2[j] = genrand_int32(d);
} }
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
buffer_size, p, 0, NULL, NULL))) buffer_size, p, 0, NULL, NULL)))
{ {
@@ -895,6 +899,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
} }
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
@@ -1044,7 +1049,6 @@ static const double specialValuesDouble[] = {
static size_t specialValuesDoubleCount = static size_t specialValuesDoubleCount =
sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]); sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]);
static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *p); static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *p);
int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
@@ -1061,6 +1065,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_double)); test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -1136,12 +1141,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
/* Qualcomm fix: 9461 read-write flags must be compatible with
* parent buffer */
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error); &region, &error);
/* Qualcomm fix: end */
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1161,7 +1163,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
test_info.tinfo[i].d = init_genrand(genrand_int32(d)); test_info.tinfo[i].d = init_genrand(genrand_int32(d));
} }
// Init the kernels // Init the kernels
{ {
BuildKernelInfo build_info = { BuildKernelInfo build_info = {
@@ -1174,6 +1175,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1189,8 +1191,8 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input arrays // Init input arrays
uint64_t *p = (uint64_t *)gIn; cl_ulong *p = (cl_ulong *)gIn;
uint64_t *p2 = (uint64_t *)gIn2; cl_ulong *p2 = (cl_ulong *)gIn2;
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++) for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
{ {
p[j] = p[j] =
@@ -1216,8 +1218,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_double); size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(test_info.k[j][0], 0, if ((error = clSetKernelArg(test_info.k[j][0], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j]))) sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{ {
@@ -1319,8 +1322,10 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint j, k; cl_uint j, k;
cl_int error; cl_int error;
const char *name = job->f->name; const char *name = job->f->name;
cl_long *t, *r; cl_long *t;
cl_double *s, *s2; cl_long *r;
cl_double *s;
cl_double *s2;
Force64BitFPUPrecision(); Force64BitFPUPrecision();
@@ -1378,7 +1383,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
((cl_ulong *)p2)[j] = genrand_int64(d); ((cl_ulong *)p2)[j] = genrand_int64(d);
} }
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
buffer_size, p, 0, NULL, NULL))) buffer_size, p, 0, NULL, NULL)))
{ {
@@ -1493,11 +1497,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
t = (cl_long *)r; t = (cl_long *)r;
for (j = 0; j < buffer_elements; j++) for (j = 0; j < buffer_elements; j++)
{ {
cl_long *q = (cl_long *)out[0]; cl_long *q = out[0];
// If we aren't getting the correctly rounded result // If we aren't getting the correctly rounded result
if (gMinVectorSizeIndex == 0 && t[j] != q[j]) if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{ {
// If we aren't getting the correctly rounded result
if (ftz) if (ftz)
{ {
if (IsDoubleSubnormal(s[j])) if (IsDoubleSubnormal(s[j]))
@@ -1528,7 +1533,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
uint64_t err = t[j] - q[j]; cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j]; if (q[j] > t[j]) err = q[j] - t[j];
vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld " vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
"vs. %lld (index: %d)\n", "vs. %lld (index: %d)\n",
@@ -1575,7 +1580,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
uint64_t err = -t[j] - q[j]; cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j]; if (q[j] > -t[j]) err = q[j] + t[j];
vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, " vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
"%.13la}: *%lld vs. %lld (index: %d)\n", "%.13la}: *%lld vs. %lld (index: %d)\n",

View File

@@ -34,13 +34,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global float", "* out, __global float",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in[i] );\n" "( in[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -115,7 +116,7 @@ static int BuildKernelDouble(const char *name, int vectorSize,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global double", "* out, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -178,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
kernSize = sizeof(c3) / sizeof(c3[0]); kernSize = sizeof(c3) / sizeof(c3[0]);
} }
char testName[32]; char testName[32];
snprintf(testName, sizeof(testName) - 1, "math_kernel%s", snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
sizeNames[vectorSize]); sizeNames[vectorSize]);
@@ -258,6 +258,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float)); test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -279,6 +280,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
test_info.f = f; test_info.f = f;
test_info.ftz = test_info.ftz =
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
// cl_kernels aren't thread safe, so we make one for each vector size for // cl_kernels aren't thread safe, so we make one for each vector size for
// every thread // every thread
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -328,8 +330,8 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
&region, &error); &region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gOutBuffer " vlog_error("Error: Unable to create sub-buffer of "
"for region {%zd, %zd}\n", "gOutBuffer for region {%zd, %zd}\n",
region.origin, region.size); region.origin, region.size);
goto exit; goto exit;
} }
@@ -355,6 +357,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -501,7 +504,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
// Get that moving // Get that moving
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
@@ -569,7 +571,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
// Get that moving // Get that moving
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -594,6 +595,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
return error; return error;
} }
} }
// Wait for the last buffer // Wait for the last buffer
out[j] = (cl_int *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], out[j] = (cl_int *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, buffer_size, CL_TRUE, CL_MAP_READ, 0, buffer_size,
@@ -711,12 +713,14 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
size_t i, j; size_t i, j;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info)); memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_double)); test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -782,12 +786,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
/* Qualcomm fix: 9461 read-write flags must be compatible with
* parent buffer */
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error); &region, &error);
/* Qualcomm fix: end */
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -817,6 +818,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -846,8 +848,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_double); size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(test_info.k[j][0], 0, if ((error = clSetKernelArg(test_info.k[j][0], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j]))) sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{ {
@@ -900,6 +903,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
vlog("\n"); vlog("\n");
exit: exit:
// Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
@@ -936,9 +940,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = job->tinfo + thread_id;
dptr dfunc = job->f->dfunc; dptr dfunc = job->f->dfunc;
int ftz = job->ftz;
cl_uint j, k; cl_uint j, k;
cl_int error; cl_int error;
int ftz = job->ftz;
const char *name = job->f->name; const char *name = job->f->name;
Force64BitFPUPrecision(); Force64BitFPUPrecision();
@@ -1027,7 +1031,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
// Get that moving // Get that moving
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -1052,6 +1055,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
return error; return error;
} }
} }
// Wait for the last buffer // Wait for the last buffer
out[j] = (cl_long *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], out[j] = (cl_long *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, buffer_size, CL_TRUE, CL_MAP_READ, 0, buffer_size,
@@ -1062,14 +1066,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
return error; return error;
} }
// Verify data // Verify data
cl_long *t = (cl_long *)r; cl_long *t = (cl_long *)r;
for (j = 0; j < buffer_elements; j++) for (j = 0; j < buffer_elements; j++)
{ {
cl_long *q = out[0]; cl_long *q = out[0];
// If we aren't getting the correctly rounded result // If we aren't getting the correctly rounded result
if (gMinVectorSizeIndex == 0 && t[j] != q[j]) if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{ {

View File

@@ -44,6 +44,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
name, name,
"( in1[i], in2[i], in3[i] );\n" "( in1[i], in2[i], in3[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -66,7 +67,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1, f2;\n" " float3 f0;\n"
" float3 f1;\n"
" float3 f2;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -133,6 +136,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
name, name,
"( in1[i], in2[i], in3[i] );\n" "( in1[i], in2[i], in3[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel", "__kernel void math_kernel",
@@ -156,7 +160,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 d0, d1, d2;\n" " double3 d0;\n"
" double3 d1;\n"
" double3 d2;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -247,37 +253,42 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
uint32_t *p3 = (uint32_t *)gIn3; cl_uint *p3 = (cl_uint *)gIn3;
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
{ {
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
p2[j] = genrand_int32(d); p2[j] = genrand_int32(d);
p3[j] = genrand_int32(d); p3[j] = genrand_int32(d);
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
bufferSize, gIn2, 0, NULL, NULL))) bufferSize, gIn2, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
bufferSize, gIn3, 0, NULL, NULL))) bufferSize, gIn3, 0, NULL, NULL)))
{ {
@@ -379,15 +390,15 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
if (gWimpyMode) if (gWimpyMode)
vlog("Wimp pass"); vlog("Wimp pass");
else else
vlog("pass"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
uint32_t *p3 = (uint32_t *)gIn3; cl_uint *p3 = (cl_uint *)gIn3;
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
{ {
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
@@ -508,18 +519,18 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
double maxErrorVal2 = 0.0f; double maxErrorVal2 = 0.0f;
double maxErrorVal3 = 0.0f; double maxErrorVal3 = 0.0f;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
uint64_t step = getTestStep(sizeof(double), bufferSize); uint64_t step = getTestStep(sizeof(double), bufferSize);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -534,18 +545,21 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
p2[j] = DoubleFromUInt32(genrand_int32(d)); p2[j] = DoubleFromUInt32(genrand_int32(d));
p3[j] = DoubleFromUInt32(genrand_int32(d)); p3[j] = DoubleFromUInt32(genrand_int32(d));
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
bufferSize, gIn2, 0, NULL, NULL))) bufferSize, gIn2, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
bufferSize, gIn3, 0, NULL, NULL))) bufferSize, gIn3, 0, NULL, NULL)))
{ {
@@ -647,7 +661,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
if (gWimpyMode) if (gWimpyMode)
vlog("Wimp pass"); vlog("Wimp pass");
else else
vlog("pass"); vlog("passed");
} }
if (gMeasureTimes) if (gMeasureTimes)

View File

@@ -52,7 +52,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
"( __global float* out, __global float* in, __global float* in2 , " "( __global float* out, __global float* in, __global float* in2, "
"__global float* in3)\n" "__global float* in3)\n"
"{\n" "{\n"
" size_t i = get_global_id(0);\n" " size_t i = get_global_id(0);\n"
@@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" float3 f0, f1, f2;\n" " float3 f0;\n"
" float3 f1;\n"
" float3 f2;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -143,7 +145,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
"( __global double* out, __global double* in, __global double* in2 , " "( __global double* out, __global double* in, __global double* in2, "
"__global double* in3)\n" "__global double* in3)\n"
"{\n" "{\n"
" size_t i = get_global_id(0);\n" " size_t i = get_global_id(0);\n"
@@ -162,7 +164,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" size_t parity = i & 1; // Figure out how many elements are " " size_t parity = i & 1; // Figure out how many elements are "
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
"buffer size \n" "buffer size \n"
" double3 d0, d1, d2;\n" " double3 d0;\n"
" double3 d1;\n"
" double3 d2;\n"
" switch( parity )\n" " switch( parity )\n"
" {\n" " {\n"
" case 1:\n" " case 1:\n"
@@ -235,7 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
info->programs + i, info->relaxedMode); info->programs + i, info->relaxedMode);
} }
// A table of more difficult cases to get right // A table of more difficult cases to get right
static const float specialValuesFloat[] = { static const float specialValuesFloat[] = {
-NAN, -NAN,
@@ -315,7 +318,7 @@ static const float specialValuesFloat[] = {
+0.0f +0.0f
}; };
static size_t specialValuesFloatCount = static const size_t specialValuesFloatCount =
sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
@@ -324,6 +327,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
uint64_t i; uint64_t i;
uint32_t j, k; uint32_t j, k;
int error; int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
cl_program programs[VECTOR_SIZE_COUNT]; cl_program programs[VECTOR_SIZE_COUNT];
cl_kernel kernels[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f; float maxError = 0.0f;
@@ -332,33 +338,34 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal2 = 0.0f; float maxErrorVal2 = 0.0f;
float maxErrorVal3 = 0.0f; float maxErrorVal3 = 0.0f;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
float float_ulps; float float_ulps;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if (gIsEmbedded) if (gIsEmbedded)
float_ulps = f->float_embedded_ulps; float_ulps = f->float_embedded_ulps;
else else
float_ulps = f->float_ulps; float_ulps = f->float_ulps;
int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
uint32_t *p3 = (uint32_t *)gIn3; cl_uint *p3 = (cl_uint *)gIn3;
j = 0; j = 0;
if (i == 0) if (i == 0)
{ // test edge cases { // test edge cases
@@ -393,18 +400,21 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
p2[j] = genrand_int32(d); p2[j] = genrand_int32(d);
p3[j] = genrand_int32(d); p3[j] = genrand_int32(d);
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
bufferSize, gIn2, 0, NULL, NULL))) bufferSize, gIn2, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
bufferSize, gIn3, 0, NULL, NULL))) bufferSize, gIn3, 0, NULL, NULL)))
{ {
@@ -493,7 +503,6 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
(float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED); (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
} }
// Read the data back // Read the data back
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
@@ -963,9 +972,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
if (gMeasureTimes) if (gMeasureTimes)
{ {
// Init input array // Init input array
uint32_t *p = (uint32_t *)gIn; cl_uint *p = (cl_uint *)gIn;
uint32_t *p2 = (uint32_t *)gIn2; cl_uint *p2 = (cl_uint *)gIn2;
uint32_t *p3 = (uint32_t *)gIn3; cl_uint *p3 = (cl_uint *)gIn3;
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
{ {
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
@@ -1160,21 +1169,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
double maxErrorVal = 0.0f; double maxErrorVal = 0.0f;
double maxErrorVal2 = 0.0f; double maxErrorVal2 = 0.0f;
double maxErrorVal3 = 0.0f; double maxErrorVal3 = 0.0f;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(double), bufferSize); uint64_t step = getTestStep(sizeof(double), bufferSize);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -1213,18 +1222,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
p2[j] = DoubleFromUInt32(genrand_int32(d)); p2[j] = DoubleFromUInt32(genrand_int32(d));
p3[j] = DoubleFromUInt32(genrand_int32(d)); p3[j] = DoubleFromUInt32(genrand_int32(d));
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
bufferSize, gIn2, 0, NULL, NULL))) bufferSize, gIn2, 0, NULL, NULL)))
{ {
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
return error; return error;
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
bufferSize, gIn3, 0, NULL, NULL))) bufferSize, gIn3, 0, NULL, NULL)))
{ {
@@ -1287,7 +1299,6 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
} }
} }
// Get that moving // Get that moving
if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); if ((error = clFlush(gQueue))) vlog("clFlush failed\n");

View File

@@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global float", "* out, __global float",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in[i] );\n" "( in[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -89,7 +90,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -118,7 +118,7 @@ static int BuildKernelDouble(const char *name, int vectorSize,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global double", "* out, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -181,7 +181,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
kernSize = sizeof(c3) / sizeof(c3[0]); kernSize = sizeof(c3) / sizeof(c3[0]);
} }
char testName[32]; char testName[32];
snprintf(testName, sizeof(testName) - 1, "math_kernel%s", snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
sizeNames[vectorSize]); sizeNames[vectorSize]);
@@ -249,7 +248,7 @@ typedef struct TestInfo
int isRangeLimited; // 1 if the function is only to be evaluated over a int isRangeLimited; // 1 if the function is only to be evaluated over a
// range // range
float half_sin_cos_tan_limit; float half_sin_cos_tan_limit;
bool relaxedMode; // True if test is to be run in relaxed mode, false bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise. // otherwise.
} TestInfo; } TestInfo;
@@ -269,10 +268,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info)); memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = getTestScale(sizeof(cl_float)); test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode) if (gWimpyMode)
{ {
test_info.subBufferSize = gWimpyBufferSize test_info.subBufferSize = gWimpyBufferSize
@@ -345,8 +344,8 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
&region, &error); &region, &error);
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of "
"for region {%zd, %zd}\n", "gInBuffer for region {%zd, %zd}\n",
region.origin, region.size); region.origin, region.size);
goto exit; goto exit;
} }
@@ -390,6 +389,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting || skipTestingRelaxed) if (!gSkipCorrectnessTesting || skipTestingRelaxed)
{ {
error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -443,8 +443,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_float); size_t vectorSize = sizeof(cl_float) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(test_info.k[j][0], 0, if ((error = clSetKernelArg(test_info.k[j][0], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j]))) sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{ {
@@ -479,9 +480,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
} }
uint64_t endTime = GetTime(); uint64_t endTime = GetTime();
double current_time = SubtractTime(endTime, startTime); double time = SubtractTime(endTime, startTime);
sum += current_time; sum += time;
if (current_time < bestTime) bestTime = current_time; if (time < bestTime) bestTime = time;
} }
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
@@ -497,6 +498,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
vlog("\n"); vlog("\n");
exit: exit:
// Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
@@ -553,7 +555,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint *out[VECTOR_SIZE_COUNT]; cl_uint *out[VECTOR_SIZE_COUNT];
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
buffer_size, 0, NULL, e + j, &error); buffer_size, 0, NULL, e + j, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -627,7 +629,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
return error; return error;
} }
// run the kernel // Run the kernel
size_t vectorCount = size_t vectorCount =
(buffer_elements + sizeValues[j] - 1) / sizeValues[j]; (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -655,7 +657,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
} }
} }
// Get that moving // Get that moving
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -670,7 +671,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
// an in order queue. // an in order queue.
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
{ {
out[j] = (uint32_t *)clEnqueueMapBuffer( out[j] = (cl_uint *)clEnqueueMapBuffer(
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
buffer_size, 0, NULL, NULL, &error); buffer_size, 0, NULL, NULL, &error);
if (error || NULL == out[j]) if (error || NULL == out[j])
@@ -680,6 +681,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
return error; return error;
} }
} }
// Wait for the last buffer // Wait for the last buffer
out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
CL_TRUE, CL_MAP_READ, 0, CL_TRUE, CL_MAP_READ, 0,
@@ -1246,12 +1248,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
/* Qualcomm fix: 9461 read-write flags must be compatible with
* parent buffer */
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
&region, &error); &region, &error);
/* Qualcomm fix: end */
if (error || NULL == test_info.tinfo[i].outBuf[j]) if (error || NULL == test_info.tinfo[i].outBuf[j])
{ {
vlog_error("Error: Unable to create sub-buffer of gInBuffer " vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1281,6 +1280,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
goto exit; goto exit;
} }
// Run the kernels
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1334,8 +1334,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// Run the kernels // Run the kernels
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{ {
size_t vectorSize = sizeValues[j] * sizeof(cl_double); size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(test_info.k[j][0], 0, if ((error = clSetKernelArg(test_info.k[j][0], 0,
sizeof(gOutBuffer[j]), &gOutBuffer[j]))) sizeof(gOutBuffer[j]), &gOutBuffer[j])))
{ {
@@ -1370,9 +1371,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
} }
uint64_t endTime = GetTime(); uint64_t endTime = GetTime();
double current_time = SubtractTime(endTime, startTime); double time = SubtractTime(endTime, startTime);
sum += current_time; sum += time;
if (current_time < bestTime) bestTime = current_time; if (time < bestTime) bestTime = time;
} }
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
@@ -1393,6 +1394,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
vlog("\n"); vlog("\n");
exit: exit:
// Release
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);

View File

@@ -36,7 +36,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out2, __global float", "* out2, __global float",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -93,6 +93,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" }\n" " }\n"
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -121,7 +122,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out2, __global double", "* out2, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -179,6 +180,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" }\n" " }\n"
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -242,17 +244,19 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
int isFract = 0 == strcmp("fract", f->nameInCode); int isFract = 0 == strcmp("fract", f->nameInCode);
int skipNanInf = isFract && !gInfNanSupport; int skipNanInf = isFract && !gInfNanSupport;
float float_ulps = getAllowedUlpError(f, relaxedMode);
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
float float_ulps = getAllowedUlpError(f, relaxedMode);
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
{ {
@@ -282,6 +286,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
} }
} }
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
@@ -454,7 +459,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
if (relaxedMode || skipNanInf) if (relaxedMode || skipNanInf)
{ {
if (skipNanInf && overflow[j]) continue; if (skipNanInf && overflow[j]) continue;
// Note: no double rounding here. Reference functions // Note: no double rounding here. Reference functions
// calculate in single precision. // calculate in single precision.
if (IsFloatInfinity(correct) || IsFloatNaN(correct) if (IsFloatInfinity(correct) || IsFloatNaN(correct)
@@ -670,6 +674,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
uint32_t *p = (uint32_t *)gIn; uint32_t *p = (uint32_t *)gIn;
for (j = 0; j < bufferSize / sizeof(float); j++) for (j = 0; j < bufferSize / sizeof(float); j++)
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
bufferSize, gIn, 0, NULL, NULL))) bufferSize, gIn, 0, NULL, NULL)))
{ {
@@ -706,7 +711,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
double bestTime = INFINITY; double bestTime = INFINITY;
for (k = 0; k < PERF_LOOP_COUNT; k++) for (k = 0; k < PERF_LOOP_COUNT; k++)
{ {
uint64_t startTime = GetTime(); uint64_t startTime = GetTime();
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, &localCount, NULL, 0, NULL,
@@ -775,13 +779,13 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -1103,7 +1107,6 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
double bestTime = INFINITY; double bestTime = INFINITY;
for (k = 0; k < PERF_LOOP_COUNT; k++) for (k = 0; k < PERF_LOOP_COUNT; k++)
{ {
uint64_t startTime = GetTime(); uint64_t startTime = GetTime();
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, &localCount, NULL, 0, NULL,

View File

@@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out2, __global float", "* out2, __global float",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in[i], out2 + i );\n" "( in[i], out2 + i );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -93,6 +94,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
" }\n" " }\n"
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -121,13 +123,14 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out2, __global double", "* out2, __global double",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in[i], out2 + i );\n" "( in[i], out2 + i );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel", "__kernel void math_kernel",
@@ -178,6 +181,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
" }\n" " }\n"
"}\n" "}\n"
}; };
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -240,13 +244,13 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal = 0.0f; float maxErrorVal = 0.0f;
float maxErrorVal2 = 0.0f; float maxErrorVal2 = 0.0f;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
float float_ulps;
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1);
cl_ulong maxiError; cl_ulong maxiError;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
float float_ulps;
if (gIsEmbedded) if (gIsEmbedded)
float_ulps = f->float_embedded_ulps; float_ulps = f->float_embedded_ulps;
else else
@@ -255,12 +259,14 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0; maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
{ {
@@ -600,22 +606,21 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
double maxErrorVal2 = 0.0f; double maxErrorVal2 = 0.0f;
cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0; cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
uint64_t step = getTestStep(sizeof(double), bufferSize); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(cl_double)) + 1);
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -624,12 +629,12 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
double *p = (double *)gIn; double *p = (double *)gIn;
if (gWimpyMode) if (gWimpyMode)
{ {
for (j = 0; j < bufferSize / sizeof(double); j++) for (j = 0; j < bufferSize / sizeof(cl_double); j++)
p[j] = DoubleFromUInt32((uint32_t)i + j * scale); p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
} }
else else
{ {
for (j = 0; j < bufferSize / sizeof(double); j++) for (j = 0; j < bufferSize / sizeof(cl_double); j++)
p[j] = DoubleFromUInt32((uint32_t)i + j); p[j] = DoubleFromUInt32((uint32_t)i + j);
} }
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -928,7 +933,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
double clocksPerOp = bestTime * (double)gDeviceFrequency double clocksPerOp = bestTime * (double)gDeviceFrequency
* gComputeDevices * gSimdSize * 1e6 * gComputeDevices * gSimdSize * 1e6
/ (bufferSize / sizeof(double)); / (bufferSize / sizeof(double));
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
f->name, sizeNames[j]); f->name, sizeNames[j]);
} }
for (; j < gMaxVectorSizeIndex; j++) vlog("\t -- "); for (; j < gMaxVectorSizeIndex; j++) vlog("\t -- ");

View File

@@ -33,13 +33,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global uint", "* out, __global uint",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
name, name,
"( in[i] );\n" "( in[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { const char *c3[] = {
"__kernel void math_kernel", "__kernel void math_kernel",
sizeNames[vectorSize], sizeNames[vectorSize],
@@ -112,7 +113,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
sizeNames[vectorSize], sizeNames[vectorSize],
"* out, __global ulong", "* out, __global ulong",
sizeNames[vectorSize], sizeNames[vectorSize],
"* in)\n" "* in )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" out[i] = ", " out[i] = ",
@@ -120,51 +121,53 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
"( in[i] );\n" "( in[i] );\n"
"}\n" }; "}\n" };
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", const char *c3[] = {
"__kernel void math_kernel", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
sizeNames[vectorSize], "__kernel void math_kernel",
"( __global double* out, __global ulong* in)\n" sizeNames[vectorSize],
"{\n" "( __global double* out, __global ulong* in )\n"
" size_t i = get_global_id(0);\n" "{\n"
" if( i + 1 < get_global_size(0) )\n" " size_t i = get_global_id(0);\n"
" {\n" " if( i + 1 < get_global_size(0) )\n"
" ulong3 u0 = vload3( 0, in + 3 * i );\n" " {\n"
" double3 f0 = ", " ulong3 u0 = vload3( 0, in + 3 * i );\n"
name, " double3 f0 = ",
"( u0 );\n" name,
" vstore3( f0, 0, out + 3*i );\n" "( u0 );\n"
" }\n" " vstore3( f0, 0, out + 3*i );\n"
" else\n" " }\n"
" {\n" " else\n"
" size_t parity = i & 1; // Figure out how " " {\n"
"many elements are left over after BUFFER_SIZE % " " size_t parity = i & 1; // Figure out how many elements are "
"(3*sizeof(float)). Assume power of two buffer size \n" "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
" ulong3 u0;\n" "buffer size \n"
" switch( parity )\n" " ulong3 u0;\n"
" {\n" " switch( parity )\n"
" case 1:\n" " {\n"
" u0 = (ulong3)( in[3*i], " " case 1:\n"
"0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n" " u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
" break;\n" "0xdeaddeaddeaddeadUL ); \n"
" case 0:\n" " break;\n"
" u0 = (ulong3)( in[3*i], in[3*i+1], " " case 0:\n"
"0xdeaddeaddeaddeadUL ); \n" " u0 = (ulong3)( in[3*i], in[3*i+1], "
" break;\n" "0xdeaddeaddeaddeadUL ); \n"
" }\n" " break;\n"
" double3 f0 = ", " }\n"
name, " double3 f0 = ",
"( u0 );\n" name,
" switch( parity )\n" "( u0 );\n"
" {\n" " switch( parity )\n"
" case 0:\n" " {\n"
" out[3*i+1] = f0.y; \n" " case 0:\n"
" // fall through\n" " out[3*i+1] = f0.y; \n"
" case 1:\n" " // fall through\n"
" out[3*i] = f0.x; \n" " case 1:\n"
" break;\n" " out[3*i] = f0.x; \n"
" }\n" " break;\n"
" }\n" " }\n"
"}\n" }; " }\n"
"}\n"
};
const char **kern = c; const char **kern = c;
size_t kernSize = sizeof(c) / sizeof(c[0]); size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -175,7 +178,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
kernSize = sizeof(c3) / sizeof(c3[0]); kernSize = sizeof(c3) / sizeof(c3[0]);
} }
char testName[32]; char testName[32];
snprintf(testName, sizeof(testName) - 1, "math_kernel%s", snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
sizeNames[vectorSize]); sizeNames[vectorSize]);
@@ -221,27 +223,28 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
float maxErrorVal = 0.0f; float maxErrorVal = 0.0f;
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
uint64_t step = getTestStep(sizeof(float), bufferSize); uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1);
int isRangeLimited = 0; int isRangeLimited = 0;
float float_ulps;
float half_sin_cos_tan_limit = 0; float half_sin_cos_tan_limit = 0;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
float float_ulps;
if (gIsEmbedded) if (gIsEmbedded)
float_ulps = f->float_embedded_ulps; float_ulps = f->float_embedded_ulps;
else else
float_ulps = f->float_ulps; float_ulps = f->float_ulps;
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, {
f->nameInCode, relaxedMode }; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
if ((error = ThreadPool_Do(BuildKernel_FloatFn, f->nameInCode, relaxedMode };
gMaxVectorSizeIndex - gMinVectorSizeIndex, if ((error = ThreadPool_Do(BuildKernel_FloatFn,
&build_info))) gMaxVectorSizeIndex - gMinVectorSizeIndex,
return error; &build_info)))
return error;
}
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
{ {
@@ -317,7 +320,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL))) &localCount, NULL, 0, NULL, NULL)))
{ {
vlog_error("FAILURE -- could not execute kernel\n"); vlog_error("FAILED -- could not execute kernel\n");
goto exit; goto exit;
} }
} }
@@ -419,7 +422,6 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
} }
} }
if (!gSkipCorrectnessTesting) if (!gSkipCorrectnessTesting)
{ {
if (gWimpyMode) if (gWimpyMode)
@@ -477,7 +479,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
&localCount, NULL, 0, NULL, &localCount, NULL, 0, NULL,
NULL))) NULL)))
{ {
vlog_error("FAILURE -- could not execute kernel\n"); vlog_error("FAILED -- could not execute kernel\n");
goto exit; goto exit;
} }
@@ -540,13 +542,13 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
Force64BitFPUPrecision(); Force64BitFPUPrecision();
// Init the kernels // Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
{ {
return error; BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
} }
for (i = 0; i < (1ULL << 32); i += step) for (i = 0; i < (1ULL << 32); i += step)
@@ -599,7 +601,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL))) &localCount, NULL, 0, NULL, NULL)))
{ {
vlog_error("FAILURE -- could not execute kernel\n"); vlog_error("FAILED -- could not execute kernel\n");
goto exit; goto exit;
} }
} }
@@ -627,7 +629,6 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
if (gSkipCorrectnessTesting) break; if (gSkipCorrectnessTesting) break;
// Verify data // Verify data
uint64_t *t = (uint64_t *)gOut_Ref; uint64_t *t = (uint64_t *)gOut_Ref;
for (j = 0; j < bufferSize / sizeof(cl_double); j++) for (j = 0; j < bufferSize / sizeof(cl_double); j++)
@@ -741,7 +742,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
&localCount, NULL, 0, NULL, &localCount, NULL, 0, NULL,
NULL))) NULL)))
{ {
vlog_error("FAILURE -- could not execute kernel\n"); vlog_error("FAILED -- could not execute kernel\n");
goto exit; goto exit;
} }