mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 08:49:02 +00:00
Reduce scope of variables (#1228)
Make variables local to loops, with appropriate types. These variables are not read after the loop without being reset first, so this patch doesn't change behaviour. These variables should now be used for one purpose only, making it easier to reason about the code. This will make future refactoring easier. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -286,7 +286,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
double maxErrorVal2 = 0.0;
|
double maxErrorVal2 = 0.0;
|
||||||
@@ -321,7 +320,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -344,7 +343,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -371,7 +370,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -413,7 +412,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -437,12 +436,12 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -450,12 +449,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -477,7 +476,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
dptr func = job->f->dfunc;
|
dptr func = job->f->dfunc;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
|
|
||||||
@@ -492,7 +490,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -511,11 +509,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
cl_double *fp = (cl_double *)p;
|
cl_double *fp = (cl_double *)p;
|
||||||
cl_double *fp2 = (cl_double *)p2;
|
cl_double *fp2 = (cl_double *)p2;
|
||||||
@@ -524,10 +522,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
fp2[j] = specialValues[y];
|
fp2[idx] = specialValues[y];
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
x = 0;
|
x = 0;
|
||||||
@@ -538,10 +536,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int64(d);
|
p[idx] = genrand_int64(d);
|
||||||
p2[j] = genrand_int64(d);
|
p2[idx] = genrand_int64(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -558,7 +556,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -626,12 +624,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
@@ -647,9 +645,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_ulong *)r;
|
t = (cl_ulong *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_ulong *q = out[k];
|
cl_ulong *q = out[k];
|
||||||
|
|
||||||
@@ -794,7 +792,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -276,7 +276,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
double maxErrorVal2 = 0.0;
|
double maxErrorVal2 = 0.0;
|
||||||
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -336,7 +335,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -363,7 +362,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -405,7 +404,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -429,12 +428,12 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -442,12 +441,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -470,7 +469,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
bool relaxedMode = job->relaxedMode;
|
bool relaxedMode = job->relaxedMode;
|
||||||
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
@@ -498,7 +496,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -517,12 +515,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
|
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
float *fp = (float *)p;
|
float *fp = (float *)p;
|
||||||
float *fp2 = (float *)p2;
|
float *fp2 = (float *)p2;
|
||||||
@@ -531,10 +528,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
fp2[j] = specialValues[y];
|
fp2[idx] = specialValues[y];
|
||||||
++x;
|
++x;
|
||||||
if (x >= specialValuesCount)
|
if (x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
@@ -546,10 +543,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[idx] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -661,7 +658,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||||
if (skipNanInf)
|
if (skipNanInf)
|
||||||
{
|
{
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
feclearexcept(FE_OVERFLOW);
|
feclearexcept(FE_OVERFLOW);
|
||||||
r[j] = (float)ref_func(s[j], s2[j]);
|
r[j] = (float)ref_func(s[j], s2[j]);
|
||||||
@@ -671,7 +668,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
r[j] = (float)ref_func(s[j], s2[j]);
|
r[j] = (float)ref_func(s[j], s2[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -679,7 +676,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
@@ -697,9 +694,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
{
|
{
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_uint *)r;
|
t = (cl_uint *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_uint *q = out[k];
|
cl_uint *q = out[k];
|
||||||
|
|
||||||
@@ -956,7 +953,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -288,7 +288,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
cl_int maxErrorVal2 = 0;
|
cl_int maxErrorVal2 = 0;
|
||||||
@@ -319,7 +318,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -342,7 +341,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -372,7 +371,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -414,7 +413,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -438,12 +437,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -451,12 +450,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -478,7 +477,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
dptr func = job->f->dfunc;
|
dptr func = job->f->dfunc;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
cl_ulong *t;
|
cl_ulong *t;
|
||||||
@@ -491,7 +489,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -510,11 +508,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||||
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
size_t idx = 0;
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
cl_double *fp = (cl_double *)p;
|
cl_double *fp = (cl_double *)p;
|
||||||
cl_int *ip2 = (cl_int *)p2;
|
cl_int *ip2 = (cl_int *)p2;
|
||||||
@@ -523,10 +521,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
ip2[j] = specialValuesInt[y];
|
ip2[idx] = specialValuesInt[y];
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
x = 0;
|
x = 0;
|
||||||
@@ -537,10 +535,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
p[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -557,7 +555,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -625,12 +623,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
r[j] = (cl_double)func.f_fi(s[j], s2[j]);
|
r[j] = (cl_double)func.f_fi(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
@@ -646,9 +644,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_ulong *)r;
|
t = (cl_ulong *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_ulong *q = out[k];
|
cl_ulong *q = out[k];
|
||||||
|
|
||||||
@@ -713,7 +711,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -280,7 +280,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
cl_int maxErrorVal2 = 0;
|
cl_int maxErrorVal2 = 0;
|
||||||
@@ -312,7 +311,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -335,7 +334,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -365,7 +364,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -407,7 +406,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -431,12 +430,12 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -444,12 +443,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -471,7 +470,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
float ulps = job->ulps;
|
float ulps = job->ulps;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
cl_uint *t = 0;
|
cl_uint *t = 0;
|
||||||
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
size_t idx = 0;
|
||||||
|
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
float *fp = (float *)p;
|
float *fp = (float *)p;
|
||||||
cl_int *ip2 = (cl_int *)p2;
|
cl_int *ip2 = (cl_int *)p2;
|
||||||
@@ -515,10 +512,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
ip2[j] = specialValuesInt[y];
|
ip2[idx] = specialValuesInt[y];
|
||||||
++x;
|
++x;
|
||||||
if (x >= specialValuesCount)
|
if (x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
@@ -530,10 +527,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[idx] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -550,7 +547,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -618,11 +615,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (float *)gOut_Ref + thread_id * buffer_elements;
|
r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (float *)gIn + thread_id * buffer_elements;
|
s = (float *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_fi(s[j], s2[j]);
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
|
r[j] = (float)func.f_fi(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
@@ -638,9 +636,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_uint *)r;
|
t = (cl_uint *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_uint *q = out[k];
|
cl_uint *q = out[k];
|
||||||
|
|
||||||
@@ -707,7 +705,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -284,7 +284,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
double maxErrorVal2 = 0.0;
|
double maxErrorVal2 = 0.0;
|
||||||
@@ -315,7 +314,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -338,7 +337,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -365,7 +364,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -407,7 +406,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -431,12 +430,12 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -444,12 +443,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -472,7 +471,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
bool relaxedMode = job->relaxedMode;
|
bool relaxedMode = job->relaxedMode;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
cl_ulong *t;
|
cl_ulong *t;
|
||||||
@@ -485,7 +483,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -504,11 +502,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
cl_double *fp = (cl_double *)p;
|
cl_double *fp = (cl_double *)p;
|
||||||
cl_double *fp2 = (cl_double *)p2;
|
cl_double *fp2 = (cl_double *)p2;
|
||||||
@@ -517,10 +515,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
fp2[j] = specialValues[y];
|
fp2[idx] = specialValues[y];
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
x = 0;
|
x = 0;
|
||||||
@@ -531,10 +529,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int64(d);
|
p[idx] = genrand_int64(d);
|
||||||
p2[j] = genrand_int64(d);
|
p2[idx] = genrand_int64(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -551,7 +549,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -619,12 +617,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
@@ -640,9 +638,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_ulong *)r;
|
t = (cl_ulong *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_ulong *q = out[k];
|
cl_ulong *q = out[k];
|
||||||
|
|
||||||
@@ -763,7 +761,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -274,7 +274,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
double maxErrorVal2 = 0.0;
|
double maxErrorVal2 = 0.0;
|
||||||
@@ -307,7 +306,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -330,7 +329,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -357,7 +356,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -399,7 +398,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -423,12 +422,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -436,12 +435,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -464,7 +463,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
bool relaxedMode = job->relaxedMode;
|
bool relaxedMode = job->relaxedMode;
|
||||||
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
float ulps = getAllowedUlpError(job->f, relaxedMode);
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
|
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{
|
{
|
||||||
// Insert special values
|
// Insert special values
|
||||||
uint32_t x, y;
|
uint32_t x, y;
|
||||||
@@ -514,10 +511,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = ((cl_uint *)specialValues)[x];
|
p[idx] = ((cl_uint *)specialValues)[x];
|
||||||
p2[j] = ((cl_uint *)specialValues)[y];
|
p2[idx] = ((cl_uint *)specialValues)[y];
|
||||||
++x;
|
++x;
|
||||||
if (x >= specialValuesCount)
|
if (x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
@@ -527,28 +524,28 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
if (relaxedMode && strcmp(name, "divide") == 0)
|
if (relaxedMode && strcmp(name, "divide") == 0)
|
||||||
{
|
{
|
||||||
cl_uint pj = p[j] & 0x7fffffff;
|
cl_uint pj = p[idx] & 0x7fffffff;
|
||||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||||
// Replace values outside [2^-62, 2^62] with QNaN
|
// Replace values outside [2^-62, 2^62] with QNaN
|
||||||
if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
|
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
|
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[idx] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
|
|
||||||
if (relaxedMode && strcmp(name, "divide") == 0)
|
if (relaxedMode && strcmp(name, "divide") == 0)
|
||||||
{
|
{
|
||||||
cl_uint pj = p[j] & 0x7fffffff;
|
cl_uint pj = p[idx] & 0x7fffffff;
|
||||||
cl_uint p2j = p2[j] & 0x7fffffff;
|
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||||
// Replace values outside [2^-62, 2^62] with QNaN
|
// Replace values outside [2^-62, 2^62] with QNaN
|
||||||
if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
|
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
|
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -649,12 +646,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||||
if (gInfNanSupport)
|
if (gInfNanSupport)
|
||||||
{
|
{
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
r[j] = (float)func.f_ff(s[j], s2[j]);
|
r[j] = (float)func.f_ff(s[j], s2[j]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
feclearexcept(FE_OVERFLOW);
|
feclearexcept(FE_OVERFLOW);
|
||||||
r[j] = (float)func.f_ff(s[j], s2[j]);
|
r[j] = (float)func.f_ff(s[j], s2[j]);
|
||||||
@@ -669,7 +666,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
@@ -685,9 +682,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_uint *)r;
|
t = (cl_uint *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_uint *q = out[k];
|
cl_uint *q = out[k];
|
||||||
|
|
||||||
@@ -892,7 +889,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -154,13 +154,12 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
|||||||
double *r = cri->r + off;
|
double *r = cri->r + off;
|
||||||
int *i = cri->i + off;
|
int *i = cri->i + off;
|
||||||
long double (*f)(long double, long double, int *) = cri->f_ffpI;
|
long double (*f)(long double, long double, int *) = cri->f_ffpI;
|
||||||
cl_uint j;
|
|
||||||
|
|
||||||
if (off + count > lim) count = lim - off;
|
if (off + count > lim) count = lim - off;
|
||||||
|
|
||||||
Force64BitFPUPrecision();
|
Force64BitFPUPrecision();
|
||||||
|
|
||||||
for (j = 0; j < count; ++j)
|
for (cl_uint j = 0; j < count; ++j)
|
||||||
r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
|
r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
@@ -168,8 +167,6 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
|
|||||||
|
|
||||||
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -198,12 +195,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
double *p2 = (double *)gIn2;
|
double *p2 = (double *)gIn2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||||
@@ -224,7 +221,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -249,7 +246,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -311,12 +308,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
double *r = (double *)gOut_Ref;
|
double *r = (double *)gOut_Ref;
|
||||||
int *r2 = (int *)gOut_Ref2;
|
int *r2 = (int *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
|
r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -339,9 +336,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint64_t *q = (uint64_t *)gOut[k];
|
uint64_t *q = (uint64_t *)gOut[k];
|
||||||
int32_t *q2 = (int32_t *)gOut2[k];
|
int32_t *q2 = (int32_t *)gOut2[k];
|
||||||
@@ -572,7 +569,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -152,11 +152,10 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
|||||||
float *r = cri->r + off;
|
float *r = cri->r + off;
|
||||||
int *i = cri->i + off;
|
int *i = cri->i + off;
|
||||||
double (*f)(double, double, int *) = cri->f_ffpI;
|
double (*f)(double, double, int *) = cri->f_ffpI;
|
||||||
cl_uint j;
|
|
||||||
|
|
||||||
if (off + count > lim) count = lim - off;
|
if (off + count > lim) count = lim - off;
|
||||||
|
|
||||||
for (j = 0; j < count; ++j)
|
for (cl_uint j = 0; j < count; ++j)
|
||||||
r[j] = (float)f((double)x[j], (double)y[j], i + j);
|
r[j] = (float)f((double)x[j], (double)y[j], i + j);
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
@@ -164,8 +163,6 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
|||||||
|
|
||||||
int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
@@ -199,12 +196,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn;
|
cl_uint *p = (cl_uint *)gIn;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2;
|
cl_uint *p2 = (cl_uint *)gIn2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[j] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[j] = genrand_int32(d);
|
||||||
@@ -225,7 +222,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -250,7 +247,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -312,12 +309,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
float *r = (float *)gOut_Ref;
|
float *r = (float *)gOut_Ref;
|
||||||
int *r2 = (int *)gOut_Ref2;
|
int *r2 = (int *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
|
r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -340,9 +337,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
int32_t *q2 = (int32_t *)gOut2[k];
|
int32_t *q2 = (int32_t *)gOut2[k];
|
||||||
@@ -557,7 +554,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -119,8 +119,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -148,18 +146,18 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -171,7 +169,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -186,7 +184,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -219,11 +217,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
int *r = (int *)gOut_Ref;
|
int *r = (int *)gOut_Ref;
|
||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
r[j] = f->dfunc.i_f(s[j]);
|
r[j] = f->dfunc.i_f(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -238,9 +236,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -294,7 +292,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
exit:
|
exit:
|
||||||
RestoreFPState(&oldMode);
|
RestoreFPState(&oldMode);
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -117,8 +117,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -145,18 +143,18 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn;
|
cl_uint *p = (cl_uint *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (cl_uint)i + j * scale;
|
p[j] = (cl_uint)i + j * scale;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (uint32_t)i + j;
|
p[j] = (uint32_t)i + j;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -168,7 +166,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -183,7 +181,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -216,11 +214,11 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
int *r = (int *)gOut_Ref;
|
int *r = (int *)gOut_Ref;
|
||||||
float *s = (float *)gIn;
|
float *s = (float *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] = f->func.i_f(s[j]);
|
r[j] = f->func.i_f(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -235,9 +233,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -290,7 +288,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
exit:
|
exit:
|
||||||
RestoreFPState(&oldMode);
|
RestoreFPState(&oldMode);
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -273,7 +273,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||||
|
|
||||||
@@ -300,7 +299,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -323,7 +322,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (size_t i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -350,7 +349,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -403,12 +402,12 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -416,12 +415,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -442,7 +441,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
dptr dfunc = job->f->dfunc;
|
dptr dfunc = job->f->dfunc;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
cl_long *t;
|
cl_long *t;
|
||||||
@@ -455,7 +453,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_long *out[VECTOR_SIZE_COUNT];
|
cl_long *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -474,21 +472,21 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn + thread_id * buffer_elements;
|
double *p = (double *)gIn + thread_id * buffer_elements;
|
||||||
double *p2 = (double *)gIn2 + thread_id * buffer_elements;
|
double *p2 = (double *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
uint32_t x, y;
|
uint32_t x, y;
|
||||||
|
|
||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = specialValues[x];
|
p[idx] = specialValues[x];
|
||||||
p2[j] = specialValues[y];
|
p2[idx] = specialValues[y];
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
x = 0;
|
x = 0;
|
||||||
@@ -499,10 +497,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
((cl_ulong *)p)[j] = genrand_int64(d);
|
((cl_ulong *)p)[idx] = genrand_int64(d);
|
||||||
((cl_ulong *)p2)[j] = genrand_int64(d);
|
((cl_ulong *)p2)[idx] = genrand_int64(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -519,7 +517,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -587,11 +585,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
|
for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||||
@@ -607,7 +605,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_long *)r;
|
t = (cl_long *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
cl_long *q = out[0];
|
cl_long *q = out[0];
|
||||||
|
|
||||||
@@ -656,7 +654,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
q = (cl_long *)out[k];
|
q = (cl_long *)out[k];
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -704,7 +702,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -263,7 +263,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
|
|
||||||
@@ -291,7 +290,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -314,7 +313,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -341,7 +340,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -394,12 +393,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -407,12 +406,12 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
free_mtdata(test_info.tinfo[i].d);
|
free_mtdata(test_info.tinfo[i].d);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
clReleaseMemObject(test_info.tinfo[i].inBuf2);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -433,7 +432,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
fptr func = job->f->func;
|
fptr func = job->f->func;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
MTdata d = tinfo->d;
|
MTdata d = tinfo->d;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
cl_int *t = 0;
|
cl_int *t = 0;
|
||||||
@@ -444,7 +442,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_int *out[VECTOR_SIZE_COUNT];
|
cl_int *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -463,12 +461,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||||
j = 0;
|
cl_uint idx = 0;
|
||||||
|
|
||||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
if (job_id <= (cl_uint)indx)
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
float *fp = (float *)p;
|
float *fp = (float *)p;
|
||||||
float *fp2 = (float *)p2;
|
float *fp2 = (float *)p2;
|
||||||
@@ -477,10 +475,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
x = (job_id * buffer_elements) % specialValuesCount;
|
x = (job_id * buffer_elements) % specialValuesCount;
|
||||||
y = (job_id * buffer_elements) / specialValuesCount;
|
y = (job_id * buffer_elements) / specialValuesCount;
|
||||||
|
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
fp2[j] = specialValues[y];
|
fp2[idx] = specialValues[y];
|
||||||
++x;
|
++x;
|
||||||
if (x >= specialValuesCount)
|
if (x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
@@ -492,10 +490,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init any remaining values.
|
// Init any remaining values.
|
||||||
for (; j < buffer_elements; j++)
|
for (; idx < buffer_elements; idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[idx] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -512,7 +510,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -580,11 +578,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
||||||
s = (float *)gIn + thread_id * buffer_elements;
|
s = (float *)gIn + thread_id * buffer_elements;
|
||||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
|
for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||||
@@ -600,7 +598,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
t = (cl_int *)r;
|
t = (cl_int *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
cl_int *q = out[0];
|
cl_int *q = out[0];
|
||||||
|
|
||||||
@@ -646,7 +644,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
q = out[k];
|
q = out[k];
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -693,7 +691,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -151,7 +151,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||||
|
|
||||||
@@ -178,7 +177,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -201,7 +200,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -218,7 +217,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -269,12 +268,12 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -282,10 +281,10 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||||
dptr dfunc = job->f->dfunc;
|
dptr dfunc = job->f->dfunc;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
|
|
||||||
@@ -315,7 +313,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_long *out[VECTOR_SIZE_COUNT];
|
cl_long *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -333,7 +331,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Write the new values to the input array
|
// Write the new values to the input array
|
||||||
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
p[j] = DoubleFromUInt32(base + j * scale);
|
p[j] = DoubleFromUInt32(base + j * scale);
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -343,7 +341,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -404,11 +402,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
||||||
cl_double *s = (cl_double *)p;
|
cl_double *s = (cl_double *)p;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
|
for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_long *)clEnqueueMapBuffer(
|
out[j] = (cl_long *)clEnqueueMapBuffer(
|
||||||
@@ -424,7 +422,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
cl_long *t = (cl_long *)r;
|
cl_long *t = (cl_long *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
cl_long *q = out[0];
|
cl_long *q = out[0];
|
||||||
|
|
||||||
@@ -450,7 +448,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
q = out[k];
|
q = out[k];
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -476,7 +474,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -150,7 +150,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
|
|
||||||
@@ -178,7 +177,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -201,7 +200,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -218,7 +217,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -269,12 +268,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -282,10 +281,10 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||||
fptr func = job->f->func;
|
fptr func = job->f->func;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error = CL_SUCCESS;
|
cl_int error = CL_SUCCESS;
|
||||||
cl_int ret = CL_SUCCESS;
|
cl_int ret = CL_SUCCESS;
|
||||||
const char *name = job->f->name;
|
const char *name = job->f->name;
|
||||||
@@ -319,7 +317,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_int *out[VECTOR_SIZE_COUNT];
|
cl_int *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -337,7 +335,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
buffer_size, p, 0, NULL, NULL)))
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
@@ -346,7 +344,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -407,11 +405,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
||||||
float *s = (float *)p;
|
float *s = (float *)p;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
|
for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_int *)clEnqueueMapBuffer(
|
out[j] = (cl_int *)clEnqueueMapBuffer(
|
||||||
@@ -427,9 +425,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
cl_int *t = (cl_int *)r;
|
cl_int *t = (cl_int *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_int *q = out[0];
|
cl_int *q = out[0];
|
||||||
|
|
||||||
@@ -456,7 +454,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
|
for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||||
|
k++)
|
||||||
{
|
{
|
||||||
q = out[k];
|
q = out[k];
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
@@ -486,7 +485,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
ret = error;
|
ret = error;
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -132,8 +132,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -155,13 +153,13 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
double *p2 = (double *)gIn2;
|
double *p2 = (double *)gIn2;
|
||||||
double *p3 = (double *)gIn3;
|
double *p3 = (double *)gIn3;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||||
@@ -190,7 +188,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -205,7 +203,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -252,11 +250,11 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
double *s2 = (double *)gIn2;
|
double *s2 = (double *)gIn2;
|
||||||
double *s3 = (double *)gIn3;
|
double *s3 = (double *)gIn3;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -293,7 +291,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -130,8 +130,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
@@ -154,13 +152,13 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn;
|
cl_uint *p = (cl_uint *)gIn;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2;
|
cl_uint *p2 = (cl_uint *)gIn2;
|
||||||
cl_uint *p3 = (cl_uint *)gIn3;
|
cl_uint *p3 = (cl_uint *)gIn3;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[j] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[j] = genrand_int32(d);
|
||||||
@@ -189,7 +187,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -204,7 +202,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -251,11 +249,11 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
float *s = (float *)gIn;
|
float *s = (float *)gIn;
|
||||||
float *s2 = (float *)gIn2;
|
float *s2 = (float *)gIn2;
|
||||||
float *s3 = (float *)gIn3;
|
float *s3 = (float *)gIn3;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
|
r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -292,7 +290,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -208,8 +208,6 @@ static const size_t specialValuesCount =
|
|||||||
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||||
bool relaxedMode)
|
bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -234,22 +232,23 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
double *p2 = (double *)gIn2;
|
double *p2 = (double *)gIn2;
|
||||||
double *p3 = (double *)gIn3;
|
double *p3 = (double *)gIn3;
|
||||||
j = 0;
|
size_t idx = 0;
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
uint32_t x, y, z;
|
uint32_t x, y, z;
|
||||||
x = y = z = 0;
|
x = y = z = 0;
|
||||||
for (; j < BUFFER_SIZE / sizeof(double); j++)
|
for (; idx < BUFFER_SIZE / sizeof(double); idx++)
|
||||||
{
|
{
|
||||||
p[j] = specialValues[x];
|
p[idx] = specialValues[x];
|
||||||
p2[j] = specialValues[y];
|
p2[idx] = specialValues[y];
|
||||||
p3[j] = specialValues[z];
|
p3[idx] = specialValues[z];
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
x = 0;
|
x = 0;
|
||||||
@@ -260,15 +259,15 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (j == BUFFER_SIZE / sizeof(double))
|
if (idx == BUFFER_SIZE / sizeof(double))
|
||||||
vlog_error("Test Error: not all special cases tested!\n");
|
vlog_error("Test Error: not all special cases tested!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; j < BUFFER_SIZE / sizeof(double); j++)
|
for (; idx < BUFFER_SIZE / sizeof(double); idx++)
|
||||||
{
|
{
|
||||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
p[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
p2[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||||
p3[j] = DoubleFromUInt32(genrand_int32(d));
|
p3[idx] = DoubleFromUInt32(genrand_int32(d));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -293,7 +292,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -308,7 +307,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -355,11 +354,11 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
double *s2 = (double *)gIn2;
|
double *s2 = (double *)gIn2;
|
||||||
double *s3 = (double *)gIn3;
|
double *s3 = (double *)gIn3;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -374,9 +373,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||||
|
|
||||||
@@ -731,7 +730,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -215,8 +215,6 @@ static const size_t specialValuesCount =
|
|||||||
|
|
||||||
int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
@@ -250,13 +248,14 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_uint *p = (cl_uint *)gIn;
|
cl_uint *p = (cl_uint *)gIn;
|
||||||
cl_uint *p2 = (cl_uint *)gIn2;
|
cl_uint *p2 = (cl_uint *)gIn2;
|
||||||
cl_uint *p3 = (cl_uint *)gIn3;
|
cl_uint *p3 = (cl_uint *)gIn3;
|
||||||
j = 0;
|
size_t idx = 0;
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
{ // test edge cases
|
{ // test edge cases
|
||||||
float *fp = (float *)gIn;
|
float *fp = (float *)gIn;
|
||||||
@@ -264,11 +263,11 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
float *fp3 = (float *)gIn3;
|
float *fp3 = (float *)gIn3;
|
||||||
uint32_t x, y, z;
|
uint32_t x, y, z;
|
||||||
x = y = z = 0;
|
x = y = z = 0;
|
||||||
for (; j < BUFFER_SIZE / sizeof(float); j++)
|
for (; idx < BUFFER_SIZE / sizeof(float); idx++)
|
||||||
{
|
{
|
||||||
fp[j] = specialValues[x];
|
fp[idx] = specialValues[x];
|
||||||
fp2[j] = specialValues[y];
|
fp2[idx] = specialValues[y];
|
||||||
fp3[j] = specialValues[z];
|
fp3[idx] = specialValues[z];
|
||||||
|
|
||||||
if (++x >= specialValuesCount)
|
if (++x >= specialValuesCount)
|
||||||
{
|
{
|
||||||
@@ -280,15 +279,15 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (j == BUFFER_SIZE / sizeof(float))
|
if (idx == BUFFER_SIZE / sizeof(float))
|
||||||
vlog_error("Test Error: not all special cases tested!\n");
|
vlog_error("Test Error: not all special cases tested!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; j < BUFFER_SIZE / sizeof(float); j++)
|
for (; idx < BUFFER_SIZE / sizeof(float); idx++)
|
||||||
{
|
{
|
||||||
p[j] = genrand_int32(d);
|
p[idx] = genrand_int32(d);
|
||||||
p2[j] = genrand_int32(d);
|
p2[idx] = genrand_int32(d);
|
||||||
p3[j] = genrand_int32(d);
|
p3[idx] = genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -328,7 +327,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
@@ -377,7 +376,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
float *s3 = (float *)gIn3;
|
float *s3 = (float *)gIn3;
|
||||||
if (skipNanInf)
|
if (skipNanInf)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
feclearexcept(FE_OVERFLOW);
|
feclearexcept(FE_OVERFLOW);
|
||||||
r[j] =
|
r[j] =
|
||||||
@@ -388,13 +387,13 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] =
|
r[j] =
|
||||||
(float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
|
(float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -409,9 +408,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
|
|
||||||
@@ -866,7 +865,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -159,7 +159,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
|
|
||||||
@@ -189,7 +188,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -212,7 +211,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_double),
|
i * test_info.subBufferSize * sizeof(cl_double),
|
||||||
@@ -229,7 +228,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -269,7 +268,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -292,12 +291,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -305,10 +304,10 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -329,7 +328,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
ThreadInfo *tinfo = job->tinfo + thread_id;
|
ThreadInfo *tinfo = job->tinfo + thread_id;
|
||||||
float ulps = job->ulps;
|
float ulps = job->ulps;
|
||||||
dptr func = job->f->dfunc;
|
dptr func = job->f->dfunc;
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
int ftz = job->ftz;
|
int ftz = job->ftz;
|
||||||
|
|
||||||
@@ -338,7 +336,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_ulong *out[VECTOR_SIZE_COUNT];
|
cl_ulong *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -356,7 +354,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Write the new values to the input array
|
// Write the new values to the input array
|
||||||
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
p[j] = DoubleFromUInt32(base + j * scale);
|
p[j] = DoubleFromUInt32(base + j * scale);
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
@@ -366,7 +364,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -428,11 +426,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||||
cl_double *s = (cl_double *)p;
|
cl_double *s = (cl_double *)p;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = (cl_double)func.f_f(s[j]);
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
|
r[j] = (cl_double)func.f_f(s[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
out[j] = (cl_ulong *)clEnqueueMapBuffer(
|
||||||
@@ -448,9 +447,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
cl_ulong *t = (cl_ulong *)r;
|
cl_ulong *t = (cl_ulong *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
cl_ulong *q = out[k];
|
cl_ulong *q = out[k];
|
||||||
|
|
||||||
@@ -516,7 +515,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -157,7 +157,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
{
|
{
|
||||||
TestInfo test_info;
|
TestInfo test_info;
|
||||||
cl_int error;
|
cl_int error;
|
||||||
size_t i, j;
|
|
||||||
float maxError = 0.0f;
|
float maxError = 0.0f;
|
||||||
double maxErrorVal = 0.0;
|
double maxErrorVal = 0.0;
|
||||||
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
|
||||||
@@ -189,7 +188,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
test_info.relaxedMode = relaxedMode;
|
test_info.relaxedMode = relaxedMode;
|
||||||
// cl_kernels aren't thread safe, so we make one for each vector size for
|
// cl_kernels aren't thread safe, so we make one for each vector size for
|
||||||
// every thread
|
// every thread
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
size_t array_size = test_info.threadCount * sizeof(cl_kernel);
|
||||||
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
test_info.k[i] = (cl_kernel *)malloc(array_size);
|
||||||
@@ -212,7 +211,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
memset(test_info.tinfo, 0,
|
memset(test_info.tinfo, 0,
|
||||||
test_info.threadCount * sizeof(*test_info.tinfo));
|
test_info.threadCount * sizeof(*test_info.tinfo));
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_buffer_region region = {
|
cl_buffer_region region = {
|
||||||
i * test_info.subBufferSize * sizeof(cl_float),
|
i * test_info.subBufferSize * sizeof(cl_float),
|
||||||
@@ -229,7 +228,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
@@ -287,7 +286,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
// Accumulate the arithmetic errors
|
// Accumulate the arithmetic errors
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
if (test_info.tinfo[i].maxError > maxError)
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
{
|
{
|
||||||
@@ -316,12 +315,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||||
{
|
{
|
||||||
clReleaseProgram(test_info.programs[i]);
|
clReleaseProgram(test_info.programs[i]);
|
||||||
if (test_info.k[i])
|
if (test_info.k[i])
|
||||||
{
|
{
|
||||||
for (j = 0; j < test_info.threadCount; j++)
|
for (cl_uint j = 0; j < test_info.threadCount; j++)
|
||||||
clReleaseKernel(test_info.k[i][j]);
|
clReleaseKernel(test_info.k[i][j]);
|
||||||
|
|
||||||
free(test_info.k[i]);
|
free(test_info.k[i]);
|
||||||
@@ -329,10 +328,10 @@ exit:
|
|||||||
}
|
}
|
||||||
if (test_info.tinfo)
|
if (test_info.tinfo)
|
||||||
{
|
{
|
||||||
for (i = 0; i < test_info.threadCount; i++)
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
{
|
{
|
||||||
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
clReleaseMemObject(test_info.tinfo[i].inBuf);
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||||
}
|
}
|
||||||
@@ -360,7 +359,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
func = job->f->rfunc;
|
func = job->f->rfunc;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_uint j, k;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
|
|
||||||
int isRangeLimited = job->isRangeLimited;
|
int isRangeLimited = job->isRangeLimited;
|
||||||
@@ -370,7 +368,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// start the map of the output arrays
|
// start the map of the output arrays
|
||||||
cl_event e[VECTOR_SIZE_COUNT];
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
cl_uint *out[VECTOR_SIZE_COUNT];
|
cl_uint *out[VECTOR_SIZE_COUNT];
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
@@ -388,7 +386,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Write the new values to the input array
|
// Write the new values to the input array
|
||||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
p[j] = base + j * scale;
|
p[j] = base + j * scale;
|
||||||
if (relaxedMode)
|
if (relaxedMode)
|
||||||
@@ -421,7 +419,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
// Wait for the map to finish
|
// Wait for the map to finish
|
||||||
if ((error = clWaitForEvents(1, e + j)))
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
@@ -482,11 +480,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||||
float *s = (float *)p;
|
float *s = (float *)p;
|
||||||
for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
|
for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
|
||||||
|
|
||||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
// for the last buffer. This is an in order queue.
|
// for the last buffer. This is an in order queue.
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
out[j] = (cl_uint *)clEnqueueMapBuffer(
|
||||||
@@ -502,9 +500,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)r;
|
uint32_t *t = (uint32_t *)r;
|
||||||
for (j = 0; j < buffer_elements; j++)
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = out[k];
|
uint32_t *q = out[k];
|
||||||
|
|
||||||
@@ -695,7 +693,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
out[j], 0, NULL, NULL)))
|
out[j], 0, NULL, NULL)))
|
||||||
|
|||||||
@@ -126,8 +126,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -154,18 +152,18 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||||
}
|
}
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -176,7 +174,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -201,7 +199,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -240,7 +238,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
double *r = (double *)gOut_Ref;
|
double *r = (double *)gOut_Ref;
|
||||||
double *r2 = (double *)gOut_Ref2;
|
double *r2 = (double *)gOut_Ref2;
|
||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
{
|
{
|
||||||
long double dd;
|
long double dd;
|
||||||
r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
|
r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
|
||||||
@@ -248,7 +246,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -271,9 +269,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||||
uint64_t *t2 = (uint64_t *)gOut_Ref2;
|
uint64_t *t2 = (uint64_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||||
uint64_t *q2 = (uint64_t *)(gOut2[k]);
|
uint64_t *q2 = (uint64_t *)(gOut2[k]);
|
||||||
@@ -438,7 +436,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -124,8 +124,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
uint32_t l;
|
uint32_t l;
|
||||||
int error;
|
int error;
|
||||||
char const *testing_mode;
|
char const *testing_mode;
|
||||||
@@ -155,13 +153,13 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
uint32_t *p = (uint32_t *)gIn;
|
uint32_t *p = (uint32_t *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
p[j] = (uint32_t)i + j * scale;
|
p[j] = (uint32_t)i + j * scale;
|
||||||
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
||||||
@@ -173,7 +171,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
p[j] = (uint32_t)i + j;
|
p[j] = (uint32_t)i + j;
|
||||||
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
if (relaxedMode && strcmp(f->name, "sincos") == 0)
|
||||||
@@ -192,7 +190,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -217,7 +215,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -272,7 +270,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
if (skipNanInf)
|
if (skipNanInf)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
double dd;
|
double dd;
|
||||||
feclearexcept(FE_OVERFLOW);
|
feclearexcept(FE_OVERFLOW);
|
||||||
@@ -289,7 +287,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
double dd;
|
double dd;
|
||||||
if (relaxedMode)
|
if (relaxedMode)
|
||||||
@@ -304,7 +302,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
if (isFract && ftz) RestoreFPState(&oldMode);
|
if (isFract && ftz) RestoreFPState(&oldMode);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -331,9 +329,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
uint32_t *t2 = (uint32_t *)gOut_Ref2;
|
uint32_t *t2 = (uint32_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)gOut[k];
|
uint32_t *q = (uint32_t *)gOut[k];
|
||||||
uint32_t *q2 = (uint32_t *)gOut2[k];
|
uint32_t *q2 = (uint32_t *)gOut2[k];
|
||||||
@@ -572,7 +570,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -133,8 +133,6 @@ static cl_ulong abs_cl_long(cl_long i)
|
|||||||
|
|
||||||
int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -162,18 +160,18 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||||
}
|
}
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -184,7 +182,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -209,7 +207,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -248,11 +246,11 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
double *r = (double *)gOut_Ref;
|
double *r = (double *)gOut_Ref;
|
||||||
int *r2 = (int *)gOut_Ref2;
|
int *r2 = (int *)gOut_Ref2;
|
||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);
|
r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -275,9 +273,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||||
int32_t *q2 = (int32_t *)(gOut2[k]);
|
int32_t *q2 = (int32_t *)(gOut2[k]);
|
||||||
@@ -409,7 +407,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -131,8 +131,6 @@ static cl_ulong abs_cl_long(cl_long i)
|
|||||||
|
|
||||||
int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -165,18 +163,18 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
uint32_t *p = (uint32_t *)gIn;
|
uint32_t *p = (uint32_t *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (uint32_t)i + j * scale;
|
p[j] = (uint32_t)i + j * scale;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (uint32_t)i + j;
|
p[j] = (uint32_t)i + j;
|
||||||
}
|
}
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -187,7 +185,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -212,7 +210,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -251,11 +249,11 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
float *r = (float *)gOut_Ref;
|
float *r = (float *)gOut_Ref;
|
||||||
int *r2 = (int *)gOut_Ref2;
|
int *r2 = (int *)gOut_Ref2;
|
||||||
float *s = (float *)gIn;
|
float *s = (float *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] = (float)f->func.f_fpI(s[j], r2 + j);
|
r[j] = (float)f->func.f_fpI(s[j], r2 + j);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -278,9 +276,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
int32_t *q2 = (int32_t *)(gOut2[k]);
|
int32_t *q2 = (int32_t *)(gOut2[k]);
|
||||||
@@ -407,7 +405,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -126,8 +126,6 @@ static cl_ulong random64(MTdata d)
|
|||||||
|
|
||||||
int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -150,11 +148,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
cl_ulong *p = (cl_ulong *)gIn;
|
cl_ulong *p = (cl_ulong *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++) p[j] = random64(d);
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
|
||||||
|
p[j] = random64(d);
|
||||||
|
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||||
@@ -164,7 +163,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -179,7 +178,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -211,11 +210,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
double *r = (double *)gOut_Ref;
|
double *r = (double *)gOut_Ref;
|
||||||
cl_ulong *s = (cl_ulong *)gIn;
|
cl_ulong *s = (cl_ulong *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
r[j] = (double)f->dfunc.f_u(s[j]);
|
r[j] = (double)f->dfunc.f_u(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -230,9 +229,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint64_t *t = (uint64_t *)gOut_Ref;
|
uint64_t *t = (uint64_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint64_t *q = (uint64_t *)(gOut[k]);
|
uint64_t *q = (uint64_t *)(gOut[k]);
|
||||||
|
|
||||||
@@ -306,7 +305,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
@@ -118,8 +118,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|||||||
|
|
||||||
int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||||
{
|
{
|
||||||
uint64_t i;
|
|
||||||
uint32_t j, k;
|
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[VECTOR_SIZE_COUNT];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
@@ -165,18 +163,18 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (i = 0; i < (1ULL << 32); i += step)
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
// Init input array
|
// Init input array
|
||||||
uint32_t *p = (uint32_t *)gIn;
|
uint32_t *p = (uint32_t *)gIn;
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (uint32_t)i + j * scale;
|
p[j] = (uint32_t)i + j * scale;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
p[j] = (uint32_t)i + j;
|
p[j] = (uint32_t)i + j;
|
||||||
}
|
}
|
||||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
@@ -187,7 +185,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
@@ -202,7 +200,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
@@ -234,11 +232,11 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
float *r = (float *)gOut_Ref;
|
float *r = (float *)gOut_Ref;
|
||||||
cl_uint *s = (cl_uint *)gIn;
|
cl_uint *s = (cl_uint *)gIn;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
r[j] = (float)f->func.f_u(s[j]);
|
r[j] = (float)f->func.f_u(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if ((error =
|
if ((error =
|
||||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
@@ -254,9 +252,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
|
|
||||||
@@ -339,7 +337,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
|
|
||||||
exit:
|
exit:
|
||||||
// Release
|
// Release
|
||||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
|
|||||||
Reference in New Issue
Block a user