mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Reduce scope of variables (#1228)
Make variables local to loops, with appropriate types. These variables are not read after the loop without being reset first, so this patch doesn't change behaviour. These variables should now be used for one purpose only, making it easier to reason about the code. This will make future refactoring easier. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -286,7 +286,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
@@ -321,7 +320,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -344,7 +343,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -371,7 +370,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -413,7 +412,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -437,12 +436,12 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -450,12 +449,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -477,7 +476,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
dptr func = job->f->dfunc;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
|
||||
@@ -492,7 +490,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -511,11 +509,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
@@ -524,10 +522,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
fp2[j] = specialValues[y];
|
||||
fp[idx] = specialValues[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
x = 0;
|
||||
@@ -538,10 +536,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int64(d);
|
||||
p2[j] = genrand_int64(d);
|
||||
p[idx] = genrand_int64(d);
|
||||
p2[idx] = genrand_int64(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -558,7 +556,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -626,12 +624,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
@@ -647,9 +645,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_ulong *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
@@ -794,7 +792,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -276,7 +276,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -336,7 +335,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -363,7 +362,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -405,7 +404,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -429,12 +428,12 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -442,12 +441,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -470,7 +469,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
bool relaxedMode = job->relaxedMode;
|
||||
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
||||
const char *name = job->f->name;
|
||||
@@ -498,7 +496,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -517,12 +515,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
@@ -531,10 +528,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
fp2[j] = specialValues[y];
|
||||
fp[idx] = specialValues[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
++x;
|
||||
if (x >= specialValuesCount)
|
||||
{
|
||||
@@ -546,10 +543,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p[idx] = genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -661,7 +658,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if (skipNanInf)
|
||||
{
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float)ref_func(s[j], s2[j]);
|
||||
@@ -671,7 +668,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)ref_func(s[j], s2[j]);
|
||||
}
|
||||
|
||||
@@ -679,7 +676,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
@@ -697,9 +694,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
// Verify data
|
||||
t = (cl_uint *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
@@ -956,7 +953,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -288,7 +288,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
@@ -319,7 +318,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -342,7 +341,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -372,7 +371,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -414,7 +413,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -438,12 +437,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -451,12 +450,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -478,7 +477,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
dptr func = job->f->dfunc;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_ulong *t;
|
||||
@@ -491,7 +489,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -510,11 +508,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
size_t idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_int *ip2 = (cl_int *)p2;
|
||||
@@ -523,10 +521,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
ip2[j] = specialValuesInt[y];
|
||||
fp[idx] = specialValues[x];
|
||||
ip2[idx] = specialValuesInt[y];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
x = 0;
|
||||
@@ -537,10 +535,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = genrand_int32(d);
|
||||
p[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[idx] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -557,7 +555,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -625,12 +623,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (cl_double)func.f_fi(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
@@ -646,9 +644,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_ulong *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
@@ -713,7 +711,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -280,7 +280,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
@@ -312,7 +311,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -335,7 +334,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -365,7 +364,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -407,7 +406,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -431,12 +430,12 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -444,12 +443,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -471,7 +470,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
int ftz = job->ftz;
|
||||
float ulps = job->ulps;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_uint *t = 0;
|
||||
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
size_t idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
cl_int *ip2 = (cl_int *)p2;
|
||||
@@ -515,10 +512,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
ip2[j] = specialValuesInt[y];
|
||||
fp[idx] = specialValues[x];
|
||||
ip2[idx] = specialValuesInt[y];
|
||||
++x;
|
||||
if (x >= specialValuesCount)
|
||||
{
|
||||
@@ -530,10 +527,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p[idx] = genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -550,7 +547,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -618,11 +615,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (float *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_fi(s[j], s2[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)func.f_fi(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
@@ -638,9 +636,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_uint *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
@@ -707,7 +705,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -284,7 +284,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
@@ -315,7 +314,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -338,7 +337,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -365,7 +364,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -407,7 +406,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -431,12 +430,12 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -444,12 +443,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -472,7 +471,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
int ftz = job->ftz;
|
||||
bool relaxedMode = job->relaxedMode;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_ulong *t;
|
||||
@@ -485,7 +483,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -504,11 +502,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
@@ -517,10 +515,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
fp2[j] = specialValues[y];
|
||||
fp[idx] = specialValues[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
x = 0;
|
||||
@@ -531,10 +529,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int64(d);
|
||||
p2[j] = genrand_int64(d);
|
||||
p[idx] = genrand_int64(d);
|
||||
p2[idx] = genrand_int64(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -551,7 +549,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -619,12 +617,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
@@ -640,9 +638,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_ulong *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
@@ -763,7 +761,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -274,7 +274,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
@@ -307,7 +306,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -330,7 +329,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -357,7 +356,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -399,7 +398,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -423,12 +422,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -436,12 +435,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -464,7 +463,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
bool relaxedMode = job->relaxedMode;
|
||||
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
||||
const char *name = job->f->name;
|
||||
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{
|
||||
// Insert special values
|
||||
uint32_t x, y;
|
||||
@@ -514,10 +511,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = ((cl_uint *)specialValues)[x];
|
||||
p2[j] = ((cl_uint *)specialValues)[y];
|
||||
p[idx] = ((cl_uint *)specialValues)[x];
|
||||
p2[idx] = ((cl_uint *)specialValues)[y];
|
||||
++x;
|
||||
if (x >= specialValuesCount)
|
||||
{
|
||||
@@ -527,28 +524,28 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
if (relaxedMode && strcmp(name, "divide") == 0)
|
||||
{
|
||||
cl_uint pj = p[j] & 0x7fffffff;
|
||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
||||
cl_uint pj = p[idx] & 0x7fffffff;
|
||||
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||
// Replace values outside [2^-62, 2^62] with QNaN
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p[idx] = genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
|
||||
if (relaxedMode && strcmp(name, "divide") == 0)
|
||||
{
|
||||
cl_uint pj = p[j] & 0x7fffffff;
|
||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
||||
cl_uint pj = p[idx] & 0x7fffffff;
|
||||
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||
// Replace values outside [2^-62, 2^62] with QNaN
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -649,12 +646,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if (gInfNanSupport)
|
||||
{
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)func.f_ff(s[j], s2[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float)func.f_ff(s[j], s2[j]);
|
||||
@@ -669,7 +666,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
@@ -685,9 +682,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_uint *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
@@ -892,7 +889,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -154,13 +154,12 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
double *r = cri->r + off;
|
||||
int *i = cri->i + off;
|
||||
long double (*f)(long double, long double, int *) = cri->f_ffpI;
|
||||
cl_uint j;
|
||||
|
||||
if (off + count > lim) count = lim - off;
|
||||
|
||||
Force64BitFPUPrecision();
|
||||
|
||||
for (j = 0; j < count; ++j)
|
||||
for (cl_uint j = 0; j < count; ++j)
|
||||
r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -168,8 +167,6 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
|
||||
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -198,12 +195,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
@@ -224,7 +221,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -249,7 +246,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -311,12 +308,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
double *r = (double *)gOut_Ref;
|
||||
int *r2 = (int *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -339,9 +336,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint64_t *q = (uint64_t *)gOut[k];
|
||||
int32_t *q2 = (int32_t *)gOut2[k];
|
||||
@@ -572,7 +569,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -152,11 +152,10 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
float *r = cri->r + off;
|
||||
int *i = cri->i + off;
|
||||
double (*f)(double, double, int *) = cri->f_ffpI;
|
||||
cl_uint j;
|
||||
|
||||
if (off + count > lim) count = lim - off;
|
||||
|
||||
for (j = 0; j < count; ++j)
|
||||
for (cl_uint j = 0; j < count; ++j)
|
||||
r[j] = (float)f((double)x[j], (double)y[j], i + j);
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -164,8 +163,6 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
|
||||
int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
@@ -199,12 +196,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
@@ -225,7 +222,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -250,7 +247,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -312,12 +309,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
float *r = (float *)gOut_Ref;
|
||||
int *r2 = (int *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -340,9 +337,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
int32_t *q2 = (int32_t *)gOut2[k];
|
||||
@@ -557,7 +554,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -119,8 +119,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -148,18 +146,18 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||
}
|
||||
|
||||
@@ -171,7 +169,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -186,7 +184,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -219,11 +217,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Calculate the correctly rounded reference result
|
||||
int *r = (int *)gOut_Ref;
|
||||
double *s = (double *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
r[j] = f->dfunc.i_f(s[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -238,9 +236,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -294,7 +292,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
exit:
|
||||
RestoreFPState(&oldMode);
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -117,8 +117,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -145,18 +143,18 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (cl_uint)i + j * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (uint32_t)i + j;
|
||||
}
|
||||
|
||||
@@ -168,7 +166,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -183,7 +181,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -216,11 +214,11 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Calculate the correctly rounded reference result
|
||||
int *r = (int *)gOut_Ref;
|
||||
float *s = (float *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] = f->func.i_f(s[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -235,9 +233,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -290,7 +288,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
exit:
|
||||
RestoreFPState(&oldMode);
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -273,7 +273,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
@@ -300,7 +299,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -323,7 +322,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (size_t i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -350,7 +349,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -403,12 +402,12 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -416,12 +415,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -442,7 +441,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
dptr dfunc = job->f->dfunc;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_long *t;
|
||||
@@ -455,7 +453,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_long *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -474,21 +472,21 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
double *p = (double *)gIn + thread_id * buffer_elements;
|
||||
double *p2 = (double *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = specialValues[x];
|
||||
p2[j] = specialValues[y];
|
||||
p[idx] = specialValues[x];
|
||||
p2[idx] = specialValues[y];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
x = 0;
|
||||
@@ -499,10 +497,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
((cl_ulong *)p)[j] = genrand_int64(d);
|
||||
((cl_ulong *)p2)[j] = genrand_int64(d);
|
||||
((cl_ulong *)p)[idx] = genrand_int64(d);
|
||||
((cl_ulong *)p2)[idx] = genrand_int64(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -519,7 +517,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -587,11 +585,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||
@@ -607,7 +605,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_long *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
cl_long *q = out[0];
|
||||
|
||||
@@ -656,7 +654,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
|
||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
q = (cl_long *)out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -704,7 +702,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -263,7 +263,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
@@ -291,7 +290,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -314,7 +313,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -341,7 +340,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -394,12 +393,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -407,12 +406,12 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
free_mtdata(test_info.tinfo[i].d);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -433,7 +432,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_int *t = 0;
|
||||
@@ -444,7 +442,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_int *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -463,12 +461,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
cl_uint idx = 0;
|
||||
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
@@ -477,10 +475,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
fp2[j] = specialValues[y];
|
||||
fp[idx] = specialValues[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
++x;
|
||||
if (x >= specialValuesCount)
|
||||
{
|
||||
@@ -492,10 +490,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p[idx] = genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -512,7 +510,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -580,11 +578,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (float *)gIn + thread_id * buffer_elements;
|
||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||
@@ -600,7 +598,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
t = (cl_int *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
cl_int *q = out[0];
|
||||
|
||||
@@ -646,7 +644,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
q = out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -693,7 +691,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -151,7 +151,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||
|
||||
@@ -178,7 +177,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -201,7 +200,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -218,7 +217,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -269,12 +268,12 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -282,10 +281,10 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||
dptr dfunc = job->f->dfunc;
|
||||
int ftz = job->ftz;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
|
||||
@@ -315,7 +313,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_long *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -333,7 +331,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Write the new values to the input array
|
||||
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
p[j] = DoubleFromUInt32(base + j * scale);
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -343,7 +341,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -404,11 +402,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)p;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||
@@ -424,7 +422,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
cl_long *t = (cl_long *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
cl_long *q = out[0];
|
||||
|
||||
@@ -450,7 +448,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
|
||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
q = out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -476,7 +474,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -150,7 +150,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
|
||||
@@ -178,7 +177,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -201,7 +200,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -218,7 +217,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -269,12 +268,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -282,10 +281,10 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
cl_uint j, k;
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int ret = CL_SUCCESS;
|
||||
const char *name = job->f->name;
|
||||
@@ -319,7 +317,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_int *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -337,7 +335,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
||||
for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
@@ -346,7 +344,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -407,11 +405,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)p;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||
@@ -427,9 +425,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
cl_int *t = (cl_int *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_int *q = out[0];
|
||||
|
||||
@@ -456,7 +454,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
|
||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||
k++)
|
||||
{
|
||||
q = out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
@@ -486,7 +485,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
exit:
|
||||
ret = error;
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -132,8 +132,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -155,13 +153,13 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
double *p3 = (double *)gIn3;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
@@ -190,7 +188,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -205,7 +203,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -252,11 +250,11 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
double *s = (double *)gIn;
|
||||
double *s2 = (double *)gIn2;
|
||||
double *s3 = (double *)gIn3;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -293,7 +291,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -130,8 +130,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
@@ -154,13 +152,13 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
cl_uint *p3 = (cl_uint *)gIn3;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
@@ -189,7 +187,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -204,7 +202,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -251,11 +249,11 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
float *s = (float *)gIn;
|
||||
float *s2 = (float *)gIn2;
|
||||
float *s3 = (float *)gIn3;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -292,7 +290,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -208,8 +208,6 @@ static const size_t specialValuesCount =
|
||||
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -234,22 +232,23 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
double *p3 = (double *)gIn3;
|
||||
j = 0;
|
||||
size_t idx = 0;
|
||||
|
||||
if (i == 0)
|
||||
{ // test edge cases
|
||||
uint32_t x, y, z;
|
||||
x = y = z = 0;
|
||||
for (; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (; idx < BUFFER_SIZE / sizeof(double); idx++)
|
||||
{
|
||||
p[j] = specialValues[x];
|
||||
p2[j] = specialValues[y];
|
||||
p3[j] = specialValues[z];
|
||||
p[idx] = specialValues[x];
|
||||
p2[idx] = specialValues[y];
|
||||
p3[idx] = specialValues[z];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
x = 0;
|
||||
@@ -260,15 +259,15 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j == BUFFER_SIZE / sizeof(double))
|
||||
if (idx == BUFFER_SIZE / sizeof(double))
|
||||
vlog_error("Test Error: not all special cases tested!\n");
|
||||
}
|
||||
|
||||
for (; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (; idx < BUFFER_SIZE / sizeof(double); idx++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p3[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||
p3[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -293,7 +292,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -308,7 +307,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -355,11 +354,11 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
double *s = (double *)gIn;
|
||||
double *s2 = (double *)gIn2;
|
||||
double *s3 = (double *)gIn3;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -374,9 +373,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
|
||||
// Verify data
|
||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||
|
||||
@@ -731,7 +730,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -215,8 +215,6 @@ static const size_t specialValuesCount =
|
||||
|
||||
int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||
@@ -250,13 +248,14 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
cl_uint *p3 = (cl_uint *)gIn3;
|
||||
j = 0;
|
||||
size_t idx = 0;
|
||||
|
||||
if (i == 0)
|
||||
{ // test edge cases
|
||||
float *fp = (float *)gIn;
|
||||
@@ -264,11 +263,11 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
float *fp3 = (float *)gIn3;
|
||||
uint32_t x, y, z;
|
||||
x = y = z = 0;
|
||||
for (; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (; idx < BUFFER_SIZE / sizeof(float); idx++)
|
||||
{
|
||||
fp[j] = specialValues[x];
|
||||
fp2[j] = specialValues[y];
|
||||
fp3[j] = specialValues[z];
|
||||
fp[idx] = specialValues[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
fp3[idx] = specialValues[z];
|
||||
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
@@ -280,15 +279,15 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j == BUFFER_SIZE / sizeof(float))
|
||||
if (idx == BUFFER_SIZE / sizeof(float))
|
||||
vlog_error("Test Error: not all special cases tested!\n");
|
||||
}
|
||||
|
||||
for (; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (; idx < BUFFER_SIZE / sizeof(float); idx++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p3[j] = genrand_int32(d);
|
||||
p[idx] = genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
p3[idx] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -328,7 +327,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
@@ -377,7 +376,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
float *s3 = (float *)gIn3;
|
||||
if (skipNanInf)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] =
|
||||
@@ -388,13 +387,13 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] =
|
||||
(float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -409,9 +408,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
|
||||
@@ -866,7 +865,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -159,7 +159,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
|
||||
@@ -189,7 +188,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -212,7 +211,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_double),
|
||||
@@ -229,7 +228,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -269,7 +268,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -292,12 +291,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -305,10 +304,10 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -329,7 +328,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||
float ulps = job->ulps;
|
||||
dptr func = job->f->dfunc;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
int ftz = job->ftz;
|
||||
|
||||
@@ -338,7 +336,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -356,7 +354,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Write the new values to the input array
|
||||
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
p[j] = DoubleFromUInt32(base + j * scale);
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -366,7 +364,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -428,11 +426,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)p;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = (cl_double)func.f_f(s[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (cl_double)func.f_f(s[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||
@@ -448,9 +447,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
cl_ulong *t = (cl_ulong *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
@@ -516,7 +515,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -157,7 +157,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||
@@ -189,7 +188,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
test_info.relaxedMode = relaxedMode;
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||
// every thread
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||
@@ -212,7 +211,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
memset(test_info.tinfo, 0,
|
||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = {
|
||||
i * test_info.subBufferSize * sizeof(cl_float),
|
||||
@@ -229,7 +228,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
@@ -287,7 +286,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
@@ -316,12 +315,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
if (test_info.k[i])
|
||||
{
|
||||
for (j = 0; j < test_info.threadCount; j++)
|
||||
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
free(test_info.k[i]);
|
||||
@@ -329,10 +328,10 @@ exit:
|
||||
}
|
||||
if (test_info.tinfo)
|
||||
{
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
@@ -360,7 +359,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
func = job->f->rfunc;
|
||||
}
|
||||
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
|
||||
int isRangeLimited = job->isRangeLimited;
|
||||
@@ -370,7 +368,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
@@ -388,7 +386,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Write the new values to the input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = base + j * scale;
|
||||
if (relaxedMode)
|
||||
@@ -421,7 +419,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
@@ -482,11 +480,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Calculate the correctly rounded reference result
|
||||
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)p;
|
||||
for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
|
||||
for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||
@@ -502,9 +500,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)r;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = out[k];
|
||||
|
||||
@@ -695,7 +693,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
|
||||
@@ -126,8 +126,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -154,18 +152,18 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -176,7 +174,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -201,7 +199,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -240,7 +238,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
double *r = (double *)gOut_Ref;
|
||||
double *r2 = (double *)gOut_Ref2;
|
||||
double *s = (double *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
long double dd;
|
||||
r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
|
||||
@@ -248,7 +246,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -271,9 +269,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||
uint64_t *t2 = (uint64_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||
uint64_t *q2 = (uint64_t *)(gOut2[k]);
|
||||
@@ -438,7 +436,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -124,8 +124,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
uint32_t l;
|
||||
int error;
|
||||
char const *testing_mode;
|
||||
@@ -155,13 +153,13 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = (uint32_t)i + j * scale;
|
||||
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
||||
@@ -173,7 +171,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = (uint32_t)i + j;
|
||||
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
||||
@@ -192,7 +190,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -217,7 +215,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -272,7 +270,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
double dd;
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
@@ -289,7 +287,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
double dd;
|
||||
if (relaxedMode)
|
||||
@@ -304,7 +302,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
if (isFract && ftz) RestoreFPState(&oldMode);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -331,9 +329,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
uint32_t *t2 = (uint32_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)gOut[k];
|
||||
uint32_t *q2 = (uint32_t *)gOut2[k];
|
||||
@@ -572,7 +570,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -133,8 +133,6 @@ static cl_ulong abs_cl_long(cl_long i)
|
||||
|
||||
int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -162,18 +160,18 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -184,7 +182,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -209,7 +207,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -248,11 +246,11 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
double *r = (double *)gOut_Ref;
|
||||
int *r2 = (int *)gOut_Ref2;
|
||||
double *s = (double *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -275,9 +273,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||
int32_t *q2 = (int32_t *)(gOut2[k]);
|
||||
@@ -409,7 +407,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -131,8 +131,6 @@ static cl_ulong abs_cl_long(cl_long i)
|
||||
|
||||
int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -165,18 +163,18 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (uint32_t)i + j * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (uint32_t)i + j;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -187,7 +185,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -212,7 +210,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -251,11 +249,11 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
float *r = (float *)gOut_Ref;
|
||||
int *r2 = (int *)gOut_Ref2;
|
||||
float *s = (float *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] = (float)f->func.f_fpI(s[j], r2 + j);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -278,9 +276,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
int32_t *q2 = (int32_t *)(gOut2[k]);
|
||||
@@ -407,7 +405,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -126,8 +126,6 @@ static cl_ulong random64(MTdata d)
|
||||
|
||||
int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -150,11 +148,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
return error;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++) p[j] = random64(d);
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
|
||||
p[j] = random64(d);
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
@@ -164,7 +163,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -179,7 +178,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -211,11 +210,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Calculate the correctly rounded reference result
|
||||
double *r = (double *)gOut_Ref;
|
||||
cl_ulong *s = (cl_ulong *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
r[j] = (double)f->dfunc.f_u(s[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -230,9 +229,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// Verify data
|
||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||
|
||||
@@ -306,7 +305,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
@@ -118,8 +118,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
uint64_t i;
|
||||
uint32_t j, k;
|
||||
int error;
|
||||
cl_program programs[VECTOR_SIZE_COUNT];
|
||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||
@@ -165,18 +163,18 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < (1ULL << 32); i += step)
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
if (gWimpyMode)
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (uint32_t)i + j * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = (uint32_t)i + j;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
@@ -187,7 +185,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
@@ -202,7 +200,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
@@ -234,11 +232,11 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
// Calculate the correctly rounded reference result
|
||||
float *r = (float *)gOut_Ref;
|
||||
cl_uint *s = (cl_uint *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
r[j] = (float)f->func.f_u(s[j]);
|
||||
|
||||
// Read the data back
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
@@ -254,9 +252,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
|
||||
@@ -339,7 +337,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
|
||||
exit:
|
||||
// Release
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
clReleaseKernel(kernels[k]);
|
||||
clReleaseProgram(programs[k]);
|
||||
|
||||
Reference in New Issue
Block a user