Change Behviour of non-uniform-work-group tests for OpenCL-3.0 (#877)

* Pass `-cl-std=CL3.0` as build option for OpenCL-3.0.
* For an OpenCL-3.0 driver that optionally does not support non-uniform
work-groups round up the global size and run the tests with uniform
work-groups. This increases coverage and allows testing of the
`get_enqueued_local_size` builtin for uniform work-groups.
This commit is contained in:
Jack Frankland
2020-09-01 17:16:17 +02:00
committed by GitHub
parent f06e1896a8
commit d1a63f8041
2 changed files with 173 additions and 104 deletions

View File

@@ -243,75 +243,134 @@ static const char *KERNEL_FUNCTION =
NL "}"
NL ;
TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
: _device(device), _context(context), _queue(queue), _dims (dims) {
TestNonUniformWorkGroup::TestNonUniformWorkGroup(
const cl_device_id &device, const cl_context &context,
const cl_command_queue &queue, const cl_uint dims, size_t *globalSize,
const size_t *localSize, const size_t *buffersSize,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
: _device(device), _context(context), _queue(queue), _dims(dims)
{
if (globalSize == NULL || dims < 1 || dims > 3) {
//throw std::invalid_argument("globalSize is NULL value.");
// This is method of informing that parameters are wrong.
// It would be checked by prepareDevice() function.
// This is used because of lack of exception support.
_globalSize[0] = 0;
return;
}
cl_uint i;
_globalWorkOffset_IsNull = true;
_localSize_IsNull = true;
setGlobalWorkgroupSize(globalSize);
setLocalWorkgroupSize(globalSize,localSize);
for (i = _dims; i < MAX_DIMS; i++) {
_globalSize[i] = 1;
}
for (i = 0; i < MAX_DIMS; i++) {
_globalWorkOffset[i] = 0;
}
if (globalWorkOffset) {
_globalWorkOffset_IsNull = false;
for (i = 0; i < _dims; i++) {
_globalWorkOffset[i] = globalWorkOffset[i];
if (globalSize == NULL || dims < 1 || dims > 3)
{
// throw std::invalid_argument("globalSize is NULL value.");
// This is method of informing that parameters are wrong.
// It would be checked by prepareDevice() function.
// This is used because of lack of exception support.
_globalSize[0] = 0;
return;
}
}
for (i = 0; i < MAX_DIMS; i++) {
_enqueuedLocalSize[i] = 1;
}
// For OpenCL-3.0 support for non-uniform workgroups is optional, it's still
// useful to run these tests since we can verify the behavior of the
// get_enqueued_local_size() builtin for uniform workgroups, so we round up
// the global size to insure uniform workgroups on those 3.0 devices.
// We only need to do this when localSize is non-null, otherwise the driver
// will select a value for localSize which will be uniform on devices that
// don't support non-uniform work-groups.
if (nullptr != localSize && get_device_cl_version(device) >= Version(3, 0))
{
// Query for the non-uniform work-group support.
cl_bool are_non_uniform_sub_groups_supported{ CL_FALSE };
auto error =
clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
sizeof(are_non_uniform_sub_groups_supported),
&are_non_uniform_sub_groups_supported, nullptr);
if (error)
{
print_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
// This signals an error to the caller (see above).
_globalSize[0] = 0;
return;
}
if (localSize) {
_localSize_IsNull = false;
for (i = 0; i < _dims; i++) {
_enqueuedLocalSize[i] = _localSize[i];
// If non-uniform work-groups are not supported round up the global
// sizes so workgroups are uniform and we have at least one.
if (CL_FALSE == are_non_uniform_sub_groups_supported)
{
log_info(
"WARNING: Non-uniform work-groups are not supported on this "
"device.\n Running test with uniform work-groups.\n");
for (unsigned dim = 0; dim < dims; ++dim)
{
auto global_size_before = globalSize[dim];
auto global_size_rounded = global_size_before
+ (localSize[dim] - global_size_before % localSize[dim]);
globalSize[dim] = global_size_rounded;
log_info("Rounding globalSize[%d] = %d -> %d\n", dim,
global_size_before, global_size_rounded);
}
}
}
}
if (reqdWorkGroupSize) {
for (i = 0; i < _dims; i++) {
_reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
cl_uint i;
_globalWorkOffset_IsNull = true;
_localSize_IsNull = true;
setGlobalWorkgroupSize(globalSize);
setLocalWorkgroupSize(globalSize, localSize);
for (i = _dims; i < MAX_DIMS; i++)
{
_globalSize[i] = 1;
}
for (i = _dims; i < MAX_DIMS; i++) {
_reqdWorkGroupSize[i] = 1;
for (i = 0; i < MAX_DIMS; i++)
{
_globalWorkOffset[i] = 0;
}
} else {
_reqdWorkGroupSize[0] = 0;
_reqdWorkGroupSize[1] = 0;
_reqdWorkGroupSize[2] = 0;
}
_testRange = Range::ALL;
if (globalWorkOffset)
{
_globalWorkOffset_IsNull = false;
for (i = 0; i < _dims; i++)
{
_globalWorkOffset[i] = globalWorkOffset[i];
}
}
_numOfGlobalWorkItems = _globalSize[0]*_globalSize[1]*_globalSize[2];
for (i = 0; i < MAX_DIMS; i++)
{
_enqueuedLocalSize[i] = 1;
}
DataContainerAttrib temp = {{0, 0, 0}};
if (localSize)
{
_localSize_IsNull = false;
for (i = 0; i < _dims; i++)
{
_enqueuedLocalSize[i] = _localSize[i];
}
}
// array with results from each region
_resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
_referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
if (reqdWorkGroupSize)
{
for (i = 0; i < _dims; i++)
{
_reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
}
for (i = _dims; i < MAX_DIMS; i++)
{
_reqdWorkGroupSize[i] = 1;
}
}
else
{
_reqdWorkGroupSize[0] = 0;
_reqdWorkGroupSize[1] = 0;
_reqdWorkGroupSize[2] = 0;
}
_testRange = Range::ALL;
_numOfGlobalWorkItems = _globalSize[0] * _globalSize[1] * _globalSize[2];
DataContainerAttrib temp = { { 0, 0, 0 } };
// array with results from each region
_resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
_referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
}
TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
@@ -482,7 +541,7 @@ int TestNonUniformWorkGroup::prepareDevice () {
if(_localSize_IsNull == false)
calculateExpectedValues();
std::string buildOptions = BUILD_CL_STD_2_0;
std::string buildOptions{};
if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
std::ostringstream tmp(" ");
tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
@@ -721,42 +780,50 @@ int TestNonUniformWorkGroup::runKernel () {
return 0;
}
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
const size_t *localSize, int range) {
runTestNonUniformWorkGroup (dims, globalSize, localSize, NULL, NULL, range);
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims,
size_t *globalSize,
const size_t *localSize,
int range)
{
runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range);
}
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
const size_t *localSize, const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize, int range) {
void SubTestExecutor::runTestNonUniformWorkGroup(
const cl_uint dims, size_t *globalSize, const size_t *localSize,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize, int range)
{
int err;
++_overallCounter;
TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize,
NULL, globalWorkOffset, reqdWorkGroupSize);
int err;
++_overallCounter;
TestNonUniformWorkGroup test(_device, _context, _queue, dims, globalSize,
localSize, NULL, globalWorkOffset,
reqdWorkGroupSize);
test.setTestRange(range);
err = test.prepareDevice();
if (err) {
log_error ("Error: prepare device\n");
++_failCounter;
return;
}
test.setTestRange(range);
err = test.prepareDevice();
if (err)
{
log_error("Error: prepare device\n");
++_failCounter;
return;
}
err = test.runKernel();
if (err) {
log_error ("Error: run kernel\n");
++_failCounter;
return;
}
err = test.runKernel();
if (err)
{
log_error("Error: run kernel\n");
++_failCounter;
return;
}
err = test.verifyResults();
if (err) {
log_error ("Error: verify results\n");
++_failCounter;
return;
}
err = test.verifyResults();
if (err)
{
log_error("Error: verify results\n");
++_failCounter;
return;
}
}
int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
@@ -764,7 +831,7 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
clProgramWrapper program;
clKernelWrapper testKernel;
std::string buildOptions = BUILD_CL_STD_2_0;
std::string buildOptions{};
if (testRange & Range::BASIC)
buildOptions += " -D TESTBASIC";

View File

@@ -25,8 +25,6 @@
#define NUMBER_OF_REGIONS 8
#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
#define MAX_DIMS 3
// This structure reflects data received from kernel.
@@ -62,18 +60,21 @@ std::string showArray (const size_t *arr, cl_uint dims);
// Main class responsible for testing
class TestNonUniformWorkGroup {
public:
TestNonUniformWorkGroup(const cl_device_id &device,
const cl_context &context,
const cl_command_queue &queue, const cl_uint dims,
size_t *globalSize, const size_t *localSize,
const size_t *buffersSize,
const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize = NULL);
TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize,
const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize=NULL);
~TestNonUniformWorkGroup();
~TestNonUniformWorkGroup ();
static size_t getMaxLocalWorkgroupSize (const cl_device_id &device);
static void setMaxLocalWorkgroupSize (size_t workGroupSize) {
TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
}
static size_t getMaxLocalWorkgroupSize(const cl_device_id &device);
static void setMaxLocalWorkgroupSize(size_t workGroupSize)
{
TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
}
static void enableStrictMode (bool state);
void setTestRange (int range) {_testRange = range;}
@@ -126,12 +127,13 @@ public:
SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
: _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
const size_t *localSize, int range);
void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize, int range);
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
const size_t *localSize, const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize, int range);
void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize,
const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize, int range);
int calculateWorkGroupSize(size_t &maxWgSize, int testRange);
int status();