Avoid manual memory management (#1260)

* Avoid manual memory management

Prefer std::vector over malloc and free. This will allow removing goto
statements by leveraging RAII.

Use appropriate type (bool) to store overflow predicates and allocate
std::vector<bool> of appropriate sizes: before this change the
allocation was unnecessary bigger than required.

No longer attempt to catch "out of host memory" issues, given that in
such situation it is generally not possible to cleanly report an error.
Rely on std::bad_alloc exception to report such issues.

Introduce a new header for common code in the math_brute_force
component. It is currently complementary to utility.h and is expected to
hold cleaned up content extracted from future refactoring operations.

List all headers as source in CMake for better compatibility with IDEs.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Remove manual or unnecessary memset

In order to use non-POD types as fields of TestInfo, memset must be
replaced with a compatible zero-initialisation.

Remove an unnecessary memset in MakeKernels.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
Marco Antognini
2021-05-24 16:34:54 +01:00
committed by GitHub
parent a08cacc673
commit ed839ebf10
15 changed files with 366 additions and 581 deletions

View File

@@ -9,7 +9,9 @@ set(${MODULE_NAME}_SOURCES
binary_operator_float.cpp binary_operator_float.cpp
binary_two_results_i_double.cpp binary_two_results_i_double.cpp
binary_two_results_i_float.cpp binary_two_results_i_float.cpp
common.h
function_list.cpp function_list.cpp
function_list.h
i_unary_double.cpp i_unary_double.cpp
i_unary_float.cpp i_unary_float.cpp
macro_binary_double.cpp macro_binary_double.cpp
@@ -20,9 +22,12 @@ set(${MODULE_NAME}_SOURCES
mad_float.cpp mad_float.cpp
main.cpp main.cpp
reference_math.cpp reference_math.cpp
reference_math.h
sleep.cpp sleep.cpp
sleep.h
ternary_double.cpp ternary_double.cpp
ternary_float.cpp ternary_float.cpp
test_functions.h
unary_double.cpp unary_double.cpp
unary_float.cpp unary_float.cpp
unary_two_results_double.cpp unary_two_results_double.cpp
@@ -32,6 +37,7 @@ set(${MODULE_NAME}_SOURCES
unary_u_double.cpp unary_u_double.cpp
unary_u_float.cpp unary_u_float.cpp
utility.cpp utility.cpp
utility.h
) )
include(../CMakeCommon.txt) include(../CMakeCommon.txt)

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -115,7 +116,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -126,7 +127,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -149,11 +151,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -284,11 +289,11 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps; float ulps = job->ulps;
dptr func = job->f->dfunc; dptr func = job->f->dfunc;
int ftz = job->ftz; int ftz = job->ftz;
@@ -647,7 +652,7 @@ exit:
int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -656,7 +661,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -685,27 +689,10 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -802,27 +789,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -147,11 +149,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -274,18 +279,18 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
bool relaxedMode = job->relaxedMode; bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode); float ulps = getAllowedUlpError(job->f, relaxedMode);
MTdata d = tinfo->d; MTdata d = tinfo->d;
cl_int error; cl_int error;
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); std::vector<bool> overflow(buffer_elements, false);
const char *name = job->f->name; const char *name = job->f->name;
int isFDim = job->isFDim; int isFDim = job->isFDim;
int skipNanInf = job->skipNanInf; int skipNanInf = job->skipNanInf;
@@ -447,7 +452,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
vlog_error("Error: clFinish failed! err: %d\n", error); vlog_error("Error: clFinish failed! err: %d\n", error);
goto exit; goto exit;
} }
free(overflow);
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -799,7 +803,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
} }
exit: exit:
if (overflow) free(overflow);
return error; return error;
} }
@@ -807,7 +810,7 @@ exit:
int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -816,7 +819,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -846,27 +848,10 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -963,27 +948,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -287,11 +292,11 @@ constexpr size_t specialValuesIntCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps; float ulps = job->ulps;
dptr func = job->f->dfunc; dptr func = job->f->dfunc;
int ftz = job->ftz; int ftz = job->ftz;
@@ -568,7 +573,7 @@ exit:
int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -577,7 +582,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -602,27 +606,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -722,27 +709,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -279,11 +284,11 @@ constexpr size_t specialValuesIntCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
float ulps = job->ulps; float ulps = job->ulps;
@@ -561,7 +566,7 @@ exit:
int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -570,7 +575,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -716,27 +703,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *operator_symbol; const char *operator_symbol;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->operator_symbol, i, info->kernel_count, return BuildKernel(info->operator_symbol, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -281,11 +286,11 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps; float ulps = job->ulps;
dptr func = job->f->dfunc; dptr func = job->f->dfunc;
int ftz = job->ftz; int ftz = job->ftz;
@@ -619,7 +624,7 @@ exit:
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
bool relaxedMode) bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -628,7 +633,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -653,27 +657,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -770,27 +757,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *operator_symbol; const char *operator_symbol;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->operator_symbol, i, info->kernel_count, return BuildKernel(info->operator_symbol, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -271,18 +276,18 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
bool relaxedMode = job->relaxedMode; bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode); float ulps = getAllowedUlpError(job->f, relaxedMode);
MTdata d = tinfo->d; MTdata d = tinfo->d;
cl_int error; cl_int error;
cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); std::vector<bool> overflow(buffer_elements, false);
const char *name = job->f->name; const char *name = job->f->name;
cl_uint *t = 0; cl_uint *t = 0;
cl_float *r = 0; cl_float *r = 0;
@@ -445,7 +450,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gSkipCorrectnessTesting) if (gSkipCorrectnessTesting)
{ {
free(overflow);
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -738,7 +742,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
} }
exit: exit:
if (overflow) free(overflow);
return error; return error;
} }
@@ -747,7 +750,7 @@ exit:
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
bool relaxedMode) bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -756,7 +759,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -783,27 +785,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -900,27 +885,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -0,0 +1,27 @@
//
// Copyright (c) 2021 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef COMMON_H
#define COMMON_H
#include "utility.h"
#include <array>
#include <vector>
// Array of thread-specific kernels for each vector size.
using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
#endif /* COMMON_H */

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -142,11 +144,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -270,11 +275,11 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
dptr dfunc = job->f->dfunc; dptr dfunc = job->f->dfunc;
int ftz = job->ftz; int ftz = job->ftz;
MTdata d = tinfo->d; MTdata d = tinfo->d;
@@ -577,13 +582,12 @@ exit:
int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -607,28 +611,11 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo) for (cl_uint i = 0; i < test_info.threadCount; i++)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (size_t i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
i * test_info.subBufferSize * sizeof(cl_double), i * test_info.subBufferSize * sizeof(cl_double),
@@ -711,27 +698,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -111,7 +112,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -122,7 +123,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -140,11 +142,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -260,11 +265,11 @@ constexpr size_t specialValuesCount =
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
MTdata d = tinfo->d; MTdata d = tinfo->d;
@@ -565,13 +570,12 @@ exit:
int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -700,27 +687,20 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
free_mtdata(test_info.tinfo[i].d);
clReleaseMemObject(test_info.tinfo[i].inBuf);
clReleaseMemObject(test_info.tinfo[i].inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
free_mtdata(threadInfo.d);
clReleaseMemObject(threadInfo.inBuf);
clReleaseMemObject(threadInfo.inBuf2);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -148,12 +153,12 @@ struct TestInfo
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint scale = job->scale; cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
dptr dfunc = job->f->dfunc; dptr dfunc = job->f->dfunc;
int ftz = job->ftz; int ftz = job->ftz;
cl_int error; cl_int error;
@@ -362,13 +367,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -392,27 +396,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -484,25 +471,18 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
clReleaseMemObject(threadInfo.inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -106,7 +107,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -117,7 +118,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -133,11 +135,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -147,12 +152,12 @@ struct TestInfo
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint scale = job->scale; cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
int ftz = job->ftz; int ftz = job->ftz;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
@@ -376,13 +381,12 @@ exit:
int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -407,27 +411,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -499,25 +486,18 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
clReleaseMemObject(threadInfo.inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -1055,8 +1055,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
cl_uint kernel_count, cl_kernel *k, cl_program *p, cl_uint kernel_count, cl_kernel *k, cl_program *p,
bool relaxedMode) bool relaxedMode)
{ {
int error = 0;
cl_uint i;
char options[200] = ""; char options[200] = "";
if (gForceFTZ) if (gForceFTZ)
@@ -1074,7 +1072,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
strcat(options, " -cl-fast-relaxed-math"); strcat(options, " -cl-fast-relaxed-math");
} }
error = int error =
create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options); create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
if (error != CL_SUCCESS) if (error != CL_SUCCESS)
{ {
@@ -1082,9 +1080,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
return error; return error;
} }
for (cl_uint i = 0; i < kernel_count; i++)
memset(k, 0, kernel_count * sizeof(*k));
for (i = 0; i < kernel_count; i++)
{ {
k[i] = clCreateKernel(*p, name, &error); k[i] = clCreateKernel(*p, name, &error);
if (NULL == k[i] || error) if (NULL == k[i] || error)

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -136,11 +138,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -157,12 +162,12 @@ struct TestInfo
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double); size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint scale = job->scale; cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps; float ulps = job->ulps;
dptr func = job->f->dfunc; dptr func = job->f->dfunc;
cl_int error; cl_int error;
@@ -389,14 +394,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -422,27 +426,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -526,25 +513,18 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
clReleaseMemObject(threadInfo.inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "common.h"
#include "function_list.h" #include "function_list.h"
#include "test_functions.h" #include "test_functions.h"
#include "utility.h" #include "utility.h"
@@ -105,7 +106,7 @@ struct BuildKernelInfo
{ {
cl_uint offset; // the first vector size to build cl_uint offset; // the first vector size to build
cl_uint kernel_count; cl_uint kernel_count;
cl_kernel **kernels; KernelMatrix &kernels;
cl_program *programs; cl_program *programs;
const char *nameInCode; const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -116,7 +117,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
BuildKernelInfo *info = (BuildKernelInfo *)p; BuildKernelInfo *info = (BuildKernelInfo *)p;
cl_uint i = info->offset + job_id; cl_uint i = info->offset + job_id;
return BuildKernel(info->nameInCode, i, info->kernel_count, return BuildKernel(info->nameInCode, i, info->kernel_count,
info->kernels[i], info->programs + i, info->relaxedMode); info->kernels[i].data(), info->programs + i,
info->relaxedMode);
} }
// Thread specific data for a worker thread // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
size_t subBufferSize; // Size of the sub-buffer in elements size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info const Func *f; // A pointer to the function info
cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
cl_kernel
*k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each // Thread-specific kernels for each vector size:
// worker thread: k[vector_size][thread_id] // k[vector_size][thread_id]
ThreadInfo * KernelMatrix k;
tinfo; // An array of thread specific information for each worker thread
// Array of thread specific information
std::vector<ThreadInfo> tinfo;
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
@@ -155,12 +160,12 @@ struct TestInfo
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{ {
const TestInfo *job = (const TestInfo *)data; TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize; size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float); size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint scale = job->scale; cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step; cl_uint base = job_id * (cl_uint)job->step;
ThreadInfo *tinfo = job->tinfo + thread_id; ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func; fptr func = job->f->func;
const char *fname = job->f->name; const char *fname = job->f->name;
bool relaxedMode = job->relaxedMode; bool relaxedMode = job->relaxedMode;
@@ -541,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{ {
TestInfo test_info; TestInfo test_info{};
cl_int error; cl_int error;
float maxError = 0.0f; float maxError = 0.0f;
double maxErrorVal = 0.0; double maxErrorVal = 0.0;
@@ -550,7 +555,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
// Init test_info // Init test_info
memset(&test_info, 0, sizeof(test_info));
test_info.threadCount = GetThreadCount(); test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE test_info.subBufferSize = BUFFER_SIZE
/ (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -576,27 +580,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// every thread // every thread
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
size_t array_size = test_info.threadCount * sizeof(cl_kernel); test_info.k[i].resize(test_info.threadCount, nullptr);
test_info.k[i] = (cl_kernel *)malloc(array_size);
if (NULL == test_info.k[i])
{
vlog_error("Error: Unable to allocate storage for kernels!\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.k[i], 0, array_size);
} }
test_info.tinfo =
(ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
if (NULL == test_info.tinfo)
{
vlog_error(
"Error: Unable to allocate storage for thread specific data.\n");
error = CL_OUT_OF_HOST_MEMORY;
goto exit;
}
memset(test_info.tinfo, 0,
test_info.threadCount * sizeof(*test_info.tinfo));
for (cl_uint i = 0; i < test_info.threadCount; i++) for (cl_uint i = 0; i < test_info.threadCount; i++)
{ {
cl_buffer_region region = { cl_buffer_region region = {
@@ -704,25 +691,18 @@ exit:
for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
{ {
clReleaseProgram(test_info.programs[i]); clReleaseProgram(test_info.programs[i]);
if (test_info.k[i]) for (auto &kernel : test_info.k[i])
{ {
for (cl_uint j = 0; j < test_info.threadCount; j++) clReleaseKernel(kernel);
clReleaseKernel(test_info.k[i][j]);
free(test_info.k[i]);
} }
} }
if (test_info.tinfo)
{
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
clReleaseMemObject(test_info.tinfo[i].inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
}
free(test_info.tinfo); for (auto &threadInfo : test_info.tinfo)
{
clReleaseMemObject(threadInfo.inBuf);
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
clReleaseMemObject(threadInfo.outBuf[j]);
clReleaseCommandQueue(threadInfo.tQueue);
} }
return error; return error;