Change Behviour of non-uniform-work-group tests for OpenCL-3.0 (#877)

* Pass `-cl-std=CL3.0` as build option for OpenCL-3.0.
* For an OpenCL-3.0 driver that optionally does not support non-uniform
work-groups round up the global size and run the tests with uniform
work-groups. This increases coverage and allows testing of the
`get_enqueued_local_size` builtin for uniform work-groups.
This commit is contained in:
Jack Frankland
2020-09-01 17:16:17 +02:00
committed by GitHub
parent f06e1896a8
commit d1a63f8041
2 changed files with 173 additions and 104 deletions

View File

@@ -243,75 +243,134 @@ static const char *KERNEL_FUNCTION =
NL "}" NL "}"
NL ; NL ;
TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context, TestNonUniformWorkGroup::TestNonUniformWorkGroup(
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize, const cl_device_id &device, const cl_context &context,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize) const cl_command_queue &queue, const cl_uint dims, size_t *globalSize,
: _device(device), _context(context), _queue(queue), _dims (dims) { const size_t *localSize, const size_t *buffersSize,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
: _device(device), _context(context), _queue(queue), _dims(dims)
{
if (globalSize == NULL || dims < 1 || dims > 3) { if (globalSize == NULL || dims < 1 || dims > 3)
//throw std::invalid_argument("globalSize is NULL value."); {
// This is method of informing that parameters are wrong. // throw std::invalid_argument("globalSize is NULL value.");
// It would be checked by prepareDevice() function. // This is method of informing that parameters are wrong.
// This is used because of lack of exception support. // It would be checked by prepareDevice() function.
_globalSize[0] = 0; // This is used because of lack of exception support.
return; _globalSize[0] = 0;
} return;
cl_uint i;
_globalWorkOffset_IsNull = true;
_localSize_IsNull = true;
setGlobalWorkgroupSize(globalSize);
setLocalWorkgroupSize(globalSize,localSize);
for (i = _dims; i < MAX_DIMS; i++) {
_globalSize[i] = 1;
}
for (i = 0; i < MAX_DIMS; i++) {
_globalWorkOffset[i] = 0;
}
if (globalWorkOffset) {
_globalWorkOffset_IsNull = false;
for (i = 0; i < _dims; i++) {
_globalWorkOffset[i] = globalWorkOffset[i];
} }
}
for (i = 0; i < MAX_DIMS; i++) { // For OpenCL-3.0 support for non-uniform workgroups is optional, it's still
_enqueuedLocalSize[i] = 1; // useful to run these tests since we can verify the behavior of the
} // get_enqueued_local_size() builtin for uniform workgroups, so we round up
// the global size to insure uniform workgroups on those 3.0 devices.
// We only need to do this when localSize is non-null, otherwise the driver
// will select a value for localSize which will be uniform on devices that
// don't support non-uniform work-groups.
if (nullptr != localSize && get_device_cl_version(device) >= Version(3, 0))
{
// Query for the non-uniform work-group support.
cl_bool are_non_uniform_sub_groups_supported{ CL_FALSE };
auto error =
clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
sizeof(are_non_uniform_sub_groups_supported),
&are_non_uniform_sub_groups_supported, nullptr);
if (error)
{
print_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
// This signals an error to the caller (see above).
_globalSize[0] = 0;
return;
}
if (localSize) { // If non-uniform work-groups are not supported round up the global
_localSize_IsNull = false; // sizes so workgroups are uniform and we have at least one.
for (i = 0; i < _dims; i++) { if (CL_FALSE == are_non_uniform_sub_groups_supported)
_enqueuedLocalSize[i] = _localSize[i]; {
log_info(
"WARNING: Non-uniform work-groups are not supported on this "
"device.\n Running test with uniform work-groups.\n");
for (unsigned dim = 0; dim < dims; ++dim)
{
auto global_size_before = globalSize[dim];
auto global_size_rounded = global_size_before
+ (localSize[dim] - global_size_before % localSize[dim]);
globalSize[dim] = global_size_rounded;
log_info("Rounding globalSize[%d] = %d -> %d\n", dim,
global_size_before, global_size_rounded);
}
}
} }
}
if (reqdWorkGroupSize) { cl_uint i;
for (i = 0; i < _dims; i++) { _globalWorkOffset_IsNull = true;
_reqdWorkGroupSize[i] = reqdWorkGroupSize[i]; _localSize_IsNull = true;
setGlobalWorkgroupSize(globalSize);
setLocalWorkgroupSize(globalSize, localSize);
for (i = _dims; i < MAX_DIMS; i++)
{
_globalSize[i] = 1;
} }
for (i = _dims; i < MAX_DIMS; i++) {
_reqdWorkGroupSize[i] = 1; for (i = 0; i < MAX_DIMS; i++)
{
_globalWorkOffset[i] = 0;
} }
} else {
_reqdWorkGroupSize[0] = 0;
_reqdWorkGroupSize[1] = 0;
_reqdWorkGroupSize[2] = 0;
}
_testRange = Range::ALL; if (globalWorkOffset)
{
_globalWorkOffset_IsNull = false;
for (i = 0; i < _dims; i++)
{
_globalWorkOffset[i] = globalWorkOffset[i];
}
}
_numOfGlobalWorkItems = _globalSize[0]*_globalSize[1]*_globalSize[2]; for (i = 0; i < MAX_DIMS; i++)
{
_enqueuedLocalSize[i] = 1;
}
DataContainerAttrib temp = {{0, 0, 0}}; if (localSize)
{
_localSize_IsNull = false;
for (i = 0; i < _dims; i++)
{
_enqueuedLocalSize[i] = _localSize[i];
}
}
// array with results from each region if (reqdWorkGroupSize)
_resultsRegionArray.resize(NUMBER_OF_REGIONS, temp); {
_referenceRegionArray.resize(NUMBER_OF_REGIONS, temp); for (i = 0; i < _dims; i++)
{
_reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
}
for (i = _dims; i < MAX_DIMS; i++)
{
_reqdWorkGroupSize[i] = 1;
}
}
else
{
_reqdWorkGroupSize[0] = 0;
_reqdWorkGroupSize[1] = 0;
_reqdWorkGroupSize[2] = 0;
}
_testRange = Range::ALL;
_numOfGlobalWorkItems = _globalSize[0] * _globalSize[1] * _globalSize[2];
DataContainerAttrib temp = { { 0, 0, 0 } };
// array with results from each region
_resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
_referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
} }
TestNonUniformWorkGroup::~TestNonUniformWorkGroup () { TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
@@ -482,7 +541,7 @@ int TestNonUniformWorkGroup::prepareDevice () {
if(_localSize_IsNull == false) if(_localSize_IsNull == false)
calculateExpectedValues(); calculateExpectedValues();
std::string buildOptions = BUILD_CL_STD_2_0; std::string buildOptions{};
if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) { if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
std::ostringstream tmp(" "); std::ostringstream tmp(" ");
tmp << " -D RWGSX=" << _reqdWorkGroupSize[0] tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
@@ -721,42 +780,50 @@ int TestNonUniformWorkGroup::runKernel () {
return 0; return 0;
} }
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize, void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims,
const size_t *localSize, int range) { size_t *globalSize,
runTestNonUniformWorkGroup (dims, globalSize, localSize, NULL, NULL, range); const size_t *localSize,
int range)
{
runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range);
} }
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize, void SubTestExecutor::runTestNonUniformWorkGroup(
const size_t *localSize, const size_t *globalWorkOffset, const cl_uint dims, size_t *globalSize, const size_t *localSize,
const size_t *reqdWorkGroupSize, int range) { const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize, int range)
{
int err; int err;
++_overallCounter; ++_overallCounter;
TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize, TestNonUniformWorkGroup test(_device, _context, _queue, dims, globalSize,
NULL, globalWorkOffset, reqdWorkGroupSize); localSize, NULL, globalWorkOffset,
reqdWorkGroupSize);
test.setTestRange(range); test.setTestRange(range);
err = test.prepareDevice(); err = test.prepareDevice();
if (err) { if (err)
log_error ("Error: prepare device\n"); {
++_failCounter; log_error("Error: prepare device\n");
return; ++_failCounter;
} return;
}
err = test.runKernel(); err = test.runKernel();
if (err) { if (err)
log_error ("Error: run kernel\n"); {
++_failCounter; log_error("Error: run kernel\n");
return; ++_failCounter;
} return;
}
err = test.verifyResults(); err = test.verifyResults();
if (err) { if (err)
log_error ("Error: verify results\n"); {
++_failCounter; log_error("Error: verify results\n");
return; ++_failCounter;
} return;
}
} }
int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) { int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
@@ -764,7 +831,7 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper testKernel; clKernelWrapper testKernel;
std::string buildOptions = BUILD_CL_STD_2_0; std::string buildOptions{};
if (testRange & Range::BASIC) if (testRange & Range::BASIC)
buildOptions += " -D TESTBASIC"; buildOptions += " -D TESTBASIC";

View File

@@ -25,8 +25,6 @@
#define NUMBER_OF_REGIONS 8 #define NUMBER_OF_REGIONS 8
#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
#define MAX_DIMS 3 #define MAX_DIMS 3
// This structure reflects data received from kernel. // This structure reflects data received from kernel.
@@ -62,18 +60,21 @@ std::string showArray (const size_t *arr, cl_uint dims);
// Main class responsible for testing // Main class responsible for testing
class TestNonUniformWorkGroup { class TestNonUniformWorkGroup {
public: public:
TestNonUniformWorkGroup(const cl_device_id &device,
const cl_context &context,
const cl_command_queue &queue, const cl_uint dims,
size_t *globalSize, const size_t *localSize,
const size_t *buffersSize,
const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize = NULL);
TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context, ~TestNonUniformWorkGroup();
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize,
const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize=NULL);
~TestNonUniformWorkGroup (); static size_t getMaxLocalWorkgroupSize(const cl_device_id &device);
static void setMaxLocalWorkgroupSize(size_t workGroupSize)
static size_t getMaxLocalWorkgroupSize (const cl_device_id &device); {
static void setMaxLocalWorkgroupSize (size_t workGroupSize) { TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize; }
}
static void enableStrictMode (bool state); static void enableStrictMode (bool state);
void setTestRange (int range) {_testRange = range;} void setTestRange (int range) {_testRange = range;}
@@ -126,12 +127,13 @@ public:
SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue) SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
: _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {} : _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize, void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize, int range); const size_t *localSize, int range);
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize, void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize, const size_t *globalWorkOffset, const size_t *localSize,
const size_t *reqdWorkGroupSize, int range); const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize, int range);
int calculateWorkGroupSize(size_t &maxWgSize, int testRange); int calculateWorkGroupSize(size_t &maxWgSize, int testRange);
int status(); int status();