Change Behviour of non-uniform-work-group tests for OpenCL-3.0 (#877)

* Pass `-cl-std=CL3.0` as build option for OpenCL-3.0.
* For an OpenCL-3.0 driver that optionally does not support non-uniform
work-groups round up the global size and run the tests with uniform
work-groups. This increases coverage and allows testing of the
`get_enqueued_local_size` builtin for uniform work-groups.
This commit is contained in:
Jack Frankland
2020-09-01 17:16:17 +02:00
committed by GitHub
parent f06e1896a8
commit d1a63f8041
2 changed files with 173 additions and 104 deletions

View File

@@ -243,12 +243,16 @@ static const char *KERNEL_FUNCTION =
NL "}" NL "}"
NL ; NL ;
TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context, TestNonUniformWorkGroup::TestNonUniformWorkGroup(
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize, const cl_device_id &device, const cl_context &context,
const cl_command_queue &queue, const cl_uint dims, size_t *globalSize,
const size_t *localSize, const size_t *buffersSize,
const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize) const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
: _device(device), _context(context), _queue(queue), _dims (dims) { : _device(device), _context(context), _queue(queue), _dims(dims)
{
if (globalSize == NULL || dims < 1 || dims > 3) { if (globalSize == NULL || dims < 1 || dims > 3)
{
// throw std::invalid_argument("globalSize is NULL value."); // throw std::invalid_argument("globalSize is NULL value.");
// This is method of informing that parameters are wrong. // This is method of informing that parameters are wrong.
// It would be checked by prepareDevice() function. // It would be checked by prepareDevice() function.
@@ -257,46 +261,102 @@ TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, co
return; return;
} }
// For OpenCL-3.0 support for non-uniform workgroups is optional, it's still
// useful to run these tests since we can verify the behavior of the
// get_enqueued_local_size() builtin for uniform workgroups, so we round up
// the global size to insure uniform workgroups on those 3.0 devices.
// We only need to do this when localSize is non-null, otherwise the driver
// will select a value for localSize which will be uniform on devices that
// don't support non-uniform work-groups.
if (nullptr != localSize && get_device_cl_version(device) >= Version(3, 0))
{
// Query for the non-uniform work-group support.
cl_bool are_non_uniform_sub_groups_supported{ CL_FALSE };
auto error =
clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
sizeof(are_non_uniform_sub_groups_supported),
&are_non_uniform_sub_groups_supported, nullptr);
if (error)
{
print_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
// This signals an error to the caller (see above).
_globalSize[0] = 0;
return;
}
// If non-uniform work-groups are not supported round up the global
// sizes so workgroups are uniform and we have at least one.
if (CL_FALSE == are_non_uniform_sub_groups_supported)
{
log_info(
"WARNING: Non-uniform work-groups are not supported on this "
"device.\n Running test with uniform work-groups.\n");
for (unsigned dim = 0; dim < dims; ++dim)
{
auto global_size_before = globalSize[dim];
auto global_size_rounded = global_size_before
+ (localSize[dim] - global_size_before % localSize[dim]);
globalSize[dim] = global_size_rounded;
log_info("Rounding globalSize[%d] = %d -> %d\n", dim,
global_size_before, global_size_rounded);
}
}
}
cl_uint i; cl_uint i;
_globalWorkOffset_IsNull = true; _globalWorkOffset_IsNull = true;
_localSize_IsNull = true; _localSize_IsNull = true;
setGlobalWorkgroupSize(globalSize); setGlobalWorkgroupSize(globalSize);
setLocalWorkgroupSize(globalSize, localSize); setLocalWorkgroupSize(globalSize, localSize);
for (i = _dims; i < MAX_DIMS; i++) { for (i = _dims; i < MAX_DIMS; i++)
{
_globalSize[i] = 1; _globalSize[i] = 1;
} }
for (i = 0; i < MAX_DIMS; i++) { for (i = 0; i < MAX_DIMS; i++)
{
_globalWorkOffset[i] = 0; _globalWorkOffset[i] = 0;
} }
if (globalWorkOffset) { if (globalWorkOffset)
{
_globalWorkOffset_IsNull = false; _globalWorkOffset_IsNull = false;
for (i = 0; i < _dims; i++) { for (i = 0; i < _dims; i++)
{
_globalWorkOffset[i] = globalWorkOffset[i]; _globalWorkOffset[i] = globalWorkOffset[i];
} }
} }
for (i = 0; i < MAX_DIMS; i++) { for (i = 0; i < MAX_DIMS; i++)
{
_enqueuedLocalSize[i] = 1; _enqueuedLocalSize[i] = 1;
} }
if (localSize) { if (localSize)
{
_localSize_IsNull = false; _localSize_IsNull = false;
for (i = 0; i < _dims; i++) { for (i = 0; i < _dims; i++)
{
_enqueuedLocalSize[i] = _localSize[i]; _enqueuedLocalSize[i] = _localSize[i];
} }
} }
if (reqdWorkGroupSize) { if (reqdWorkGroupSize)
for (i = 0; i < _dims; i++) { {
for (i = 0; i < _dims; i++)
{
_reqdWorkGroupSize[i] = reqdWorkGroupSize[i]; _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
} }
for (i = _dims; i < MAX_DIMS; i++) { for (i = _dims; i < MAX_DIMS; i++)
{
_reqdWorkGroupSize[i] = 1; _reqdWorkGroupSize[i] = 1;
} }
} else { }
else
{
_reqdWorkGroupSize[0] = 0; _reqdWorkGroupSize[0] = 0;
_reqdWorkGroupSize[1] = 0; _reqdWorkGroupSize[1] = 0;
_reqdWorkGroupSize[2] = 0; _reqdWorkGroupSize[2] = 0;
@@ -311,7 +371,6 @@ TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, co
// array with results from each region // array with results from each region
_resultsRegionArray.resize(NUMBER_OF_REGIONS, temp); _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
_referenceRegionArray.resize(NUMBER_OF_REGIONS, temp); _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
} }
TestNonUniformWorkGroup::~TestNonUniformWorkGroup () { TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
@@ -482,7 +541,7 @@ int TestNonUniformWorkGroup::prepareDevice () {
if(_localSize_IsNull == false) if(_localSize_IsNull == false)
calculateExpectedValues(); calculateExpectedValues();
std::string buildOptions = BUILD_CL_STD_2_0; std::string buildOptions{};
if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) { if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
std::ostringstream tmp(" "); std::ostringstream tmp(" ");
tmp << " -D RWGSX=" << _reqdWorkGroupSize[0] tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
@@ -721,38 +780,46 @@ int TestNonUniformWorkGroup::runKernel () {
return 0; return 0;
} }
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize, void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims,
const size_t *localSize, int range) { size_t *globalSize,
const size_t *localSize,
int range)
{
runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range); runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range);
} }
void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize, void SubTestExecutor::runTestNonUniformWorkGroup(
const size_t *localSize, const size_t *globalWorkOffset, const cl_uint dims, size_t *globalSize, const size_t *localSize,
const size_t *reqdWorkGroupSize, int range) { const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize, int range)
{
int err; int err;
++_overallCounter; ++_overallCounter;
TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize, TestNonUniformWorkGroup test(_device, _context, _queue, dims, globalSize,
NULL, globalWorkOffset, reqdWorkGroupSize); localSize, NULL, globalWorkOffset,
reqdWorkGroupSize);
test.setTestRange(range); test.setTestRange(range);
err = test.prepareDevice(); err = test.prepareDevice();
if (err) { if (err)
{
log_error("Error: prepare device\n"); log_error("Error: prepare device\n");
++_failCounter; ++_failCounter;
return; return;
} }
err = test.runKernel(); err = test.runKernel();
if (err) { if (err)
{
log_error("Error: run kernel\n"); log_error("Error: run kernel\n");
++_failCounter; ++_failCounter;
return; return;
} }
err = test.verifyResults(); err = test.verifyResults();
if (err) { if (err)
{
log_error("Error: verify results\n"); log_error("Error: verify results\n");
++_failCounter; ++_failCounter;
return; return;
@@ -764,7 +831,7 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper testKernel; clKernelWrapper testKernel;
std::string buildOptions = BUILD_CL_STD_2_0; std::string buildOptions{};
if (testRange & Range::BASIC) if (testRange & Range::BASIC)
buildOptions += " -D TESTBASIC"; buildOptions += " -D TESTBASIC";

View File

@@ -25,8 +25,6 @@
#define NUMBER_OF_REGIONS 8 #define NUMBER_OF_REGIONS 8
#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
#define MAX_DIMS 3 #define MAX_DIMS 3
// This structure reflects data received from kernel. // This structure reflects data received from kernel.
@@ -62,16 +60,19 @@ std::string showArray (const size_t *arr, cl_uint dims);
// Main class responsible for testing // Main class responsible for testing
class TestNonUniformWorkGroup { class TestNonUniformWorkGroup {
public: public:
TestNonUniformWorkGroup(const cl_device_id &device,
TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context, const cl_context &context,
const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const cl_command_queue &queue, const cl_uint dims,
const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset, size_t *globalSize, const size_t *localSize,
const size_t *buffersSize,
const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize = NULL); const size_t *reqdWorkGroupSize = NULL);
~TestNonUniformWorkGroup(); ~TestNonUniformWorkGroup();
static size_t getMaxLocalWorkgroupSize(const cl_device_id &device); static size_t getMaxLocalWorkgroupSize(const cl_device_id &device);
static void setMaxLocalWorkgroupSize (size_t workGroupSize) { static void setMaxLocalWorkgroupSize(size_t workGroupSize)
{
TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize; TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
} }
static void enableStrictMode (bool state); static void enableStrictMode (bool state);
@@ -126,11 +127,12 @@ public:
SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue) SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
: _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {} : _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize, void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize, int range); const size_t *localSize, int range);
void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize, void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
const size_t *localSize, const size_t *globalWorkOffset, const size_t *localSize,
const size_t *globalWorkOffset,
const size_t *reqdWorkGroupSize, int range); const size_t *reqdWorkGroupSize, int range);
int calculateWorkGroupSize(size_t &maxWgSize, int testRange); int calculateWorkGroupSize(size_t &maxWgSize, int testRange);