cl20: Khronos Bug 16080 Fix local work size limit.

Problem: Some tests assume that all local work-items can be used in a
single dimension of an NDRange.

Spec References: OpenCL C 2.0 r19, table 4.3,
CL_DEVICE_MAX_WORK_ITEM_SIZES.

Solution: The overall maximum local work size is trimmed to that of an
NDRange's first dimension or all dimensions, as appropriate.

Test Suite Affected: atomics, non_uniform_work_group, and workgroups.

Side Effects: None

Change-Id: I2e8179ca15c2c090f47ea84d1d3c109dd69ec185
This commit is contained in:
Samuel Pauls
2016-10-05 18:49:15 -04:00
committed by Kévin Petit
parent e0d7ab2187
commit 627c180a31
12 changed files with 129 additions and 72 deletions

View File

@@ -968,6 +968,46 @@ int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_
}
extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
{
cl_uint maxDim;
size_t maxWgSize;
size_t *maxWgSizePerDim;
int error;
error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
if( !maxWgSizePerDim )
{
log_error( "Unable to allocate maxWgSizePerDim\n" );
return -1;
}
error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
if( error != CL_SUCCESS)
{
log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
free( maxWgSizePerDim );
return error;
}
// "maxWgSize" is limited to that of the first dimension.
if( maxWgSize > maxWgSizePerDim[0] )
{
maxWgSize = maxWgSizePerDim[0];
}
free( maxWgSizePerDim );
*outSize = maxWgSize;
return 0;
}
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
size_t globalThreadSize, size_t *outMaxSize )
{

View File

@@ -120,6 +120,9 @@ extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
/* Helper to obtain the biggest allowed 1D work group size on a given device */
extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize );
/* Helper to determine if an extension is supported by a device */
extern int is_extension_available( cl_device_id device, const char *extensionName );

View File

@@ -205,6 +205,12 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
// "workSize" is limited to that of the first dimension as only a 1DRange is executed.
if( maxSizes[0] < workSize )
{
workSize = maxSizes[0];
}
threadSize = groupSize = workSize;
}

View File

@@ -455,6 +455,7 @@ void TestNonUniformWorkGroup::enableStrictMode(bool state) {
int TestNonUniformWorkGroup::prepareDevice () {
int err;
cl_uint device_max_dimensions;
cl_uint i;
if (_globalSize[0] == 0)
{
@@ -462,9 +463,6 @@ int TestNonUniformWorkGroup::prepareDevice () {
return -1;
}
if(_localSize_IsNull == false)
calculateExpectedValues();
err = clGetDeviceInfo(_device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(device_max_dimensions), &device_max_dimensions, NULL);
test_error(err, "clGetDeviceInfo failed");
@@ -474,6 +472,16 @@ int TestNonUniformWorkGroup::prepareDevice () {
test_error(err, "clGetDeviceInfo failed");
// Trim the local size to the limitations of what the device supports in each dimension.
for (i = 0; i < _dims; i++) {
if(_enqueuedLocalSize[i] > _maxWorkItemSizes[i]) {
_enqueuedLocalSize[i] = _maxWorkItemSizes[i];
}
}
if(_localSize_IsNull == false)
calculateExpectedValues();
std::string buildOptions = BUILD_CL_STD_2_0;
if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
std::ostringstream tmp(" ");

View File

@@ -83,9 +83,9 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -83,9 +83,9 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -178,9 +178,9 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -180,9 +180,9 @@ test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_comma
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -281,9 +281,9 @@ test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -381,9 +381,9 @@ test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -482,9 +482,9 @@ test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_com
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -188,9 +188,9 @@ test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -289,9 +289,9 @@ test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -389,9 +389,9 @@ test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -490,9 +490,9 @@ test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -177,9 +177,9 @@ test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -278,9 +278,9 @@ test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -378,9 +378,9 @@ test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -479,9 +479,9 @@ test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -179,9 +179,9 @@ test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -280,9 +280,9 @@ test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -380,9 +380,9 @@ test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -481,9 +481,9 @@ test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;

View File

@@ -179,9 +179,9 @@ test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -280,9 +280,9 @@ test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -380,9 +380,9 @@ test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;
@@ -481,9 +481,9 @@ test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
if (err)
return -1;
// "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
num_elements = n_elems;