From 627c180a313be0586cf463eb0c7cfaf08a7b6ae5 Mon Sep 17 00:00:00 2001 From: Samuel Pauls Date: Wed, 5 Oct 2016 18:49:15 -0400 Subject: [PATCH] cl20: Khronos Bug 16080 Fix local work size limit. Problem: Some tests assume that all local work-items can be used in a single dimension of an NDRange. Spec References: OpenCL C 2.0 r19, table 4.3, CL_DEVICE_MAX_WORK_ITEM_SIZES. Solution: The overall maximum local work size is trimmed to that of an NDRange's first dimension or all dimensions, as appropriate. Test Suite Affected: atomics, non_uniform_work_group, and workgroups. Side Effects: None Change-Id: I2e8179ca15c2c090f47ea84d1d3c109dd69ec185 --- test_common/harness/kernelHelpers.c | 40 +++++++++++++++++++ test_common/harness/kernelHelpers.h | 3 ++ test_conformance/atomics/test_atomics.cpp | 6 +++ .../TestNonUniformWorkGroup.cpp | 14 +++++-- test_conformance/workgroups/test_wg_all.c | 6 +-- test_conformance/workgroups/test_wg_any.c | 6 +-- .../workgroups/test_wg_broadcast.c | 6 +-- test_conformance/workgroups/test_wg_reduce.c | 24 +++++------ .../workgroups/test_wg_scan_exclusive_add.c | 24 +++++------ .../workgroups/test_wg_scan_inclusive_add.c | 24 +++++------ .../workgroups/test_wg_scan_inclusive_max.c | 24 +++++------ .../workgroups/test_wg_scan_inclusive_min.c | 24 +++++------ 12 files changed, 129 insertions(+), 72 deletions(-) diff --git a/test_common/harness/kernelHelpers.c b/test_common/harness/kernelHelpers.c index ee7c5c70..29d6f18c 100644 --- a/test_common/harness/kernelHelpers.c +++ b/test_common/harness/kernelHelpers.c @@ -968,6 +968,46 @@ int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_ } +extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize ) +{ + cl_uint maxDim; + size_t maxWgSize; + size_t *maxWgSizePerDim; + int error; + + error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL ); + test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" ); + + error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL ); + test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" ); + maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) ); + if( !maxWgSizePerDim ) + { + log_error( "Unable to allocate maxWgSizePerDim\n" ); + return -1; + } + + error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL ); + if( error != CL_SUCCESS) + { + log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" ); + free( maxWgSizePerDim ); + return error; + } + + // "maxWgSize" is limited to that of the first dimension. + if( maxWgSize > maxWgSizePerDim[0] ) + { + maxWgSize = maxWgSizePerDim[0]; + } + + free( maxWgSizePerDim ); + + *outSize = maxWgSize; + return 0; +} + + int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outMaxSize ) { diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h index 4c85d242..50bdce02 100644 --- a/test_common/harness/kernelHelpers.h +++ b/test_common/harness/kernelHelpers.h @@ -120,6 +120,9 @@ extern int get_device_version( cl_device_id id, size_t* major, size_t* minor); /* Helper to obtain the biggest allowed work group size for all the devices in a given group */ extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits ); +/* Helper to obtain the biggest allowed 1D work group size on a given device */ +extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize ); + /* Helper to determine if an extension is supported by a device */ extern int is_extension_available( cl_device_id device, const char *extensionName ); diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index 06941e58..5e03b70e 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -205,6 +205,12 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL ); test_error( error, "Unable to obtain max work group size for device and kernel combo" ); + // "workSize" is limited to that of the first dimension as only a 1DRange is executed. + if( maxSizes[0] < workSize ) + { + workSize = maxSizes[0]; + } + threadSize = groupSize = workSize; } diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index 8173377b..de041c25 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -455,6 +455,7 @@ void TestNonUniformWorkGroup::enableStrictMode(bool state) { int TestNonUniformWorkGroup::prepareDevice () { int err; cl_uint device_max_dimensions; + cl_uint i; if (_globalSize[0] == 0) { @@ -462,9 +463,6 @@ int TestNonUniformWorkGroup::prepareDevice () { return -1; } - if(_localSize_IsNull == false) - calculateExpectedValues(); - err = clGetDeviceInfo(_device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(device_max_dimensions), &device_max_dimensions, NULL); test_error(err, "clGetDeviceInfo failed"); @@ -474,6 +472,16 @@ int TestNonUniformWorkGroup::prepareDevice () { test_error(err, "clGetDeviceInfo failed"); + // Trim the local size to the limitations of what the device supports in each dimension. + for (i = 0; i < _dims; i++) { + if(_enqueuedLocalSize[i] > _maxWorkItemSizes[i]) { + _enqueuedLocalSize[i] = _maxWorkItemSizes[i]; + } + } + + if(_localSize_IsNull == false) + calculateExpectedValues(); + std::string buildOptions = BUILD_CL_STD_2_0; if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) { std::ostringstream tmp(" "); diff --git a/test_conformance/workgroups/test_wg_all.c b/test_conformance/workgroups/test_wg_all.c index da775d6b..c0237f99 100644 --- a/test_conformance/workgroups/test_wg_all.c +++ b/test_conformance/workgroups/test_wg_all.c @@ -83,9 +83,9 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_any.c b/test_conformance/workgroups/test_wg_any.c index e843a5b3..a5c14f44 100644 --- a/test_conformance/workgroups/test_wg_any.c +++ b/test_conformance/workgroups/test_wg_any.c @@ -83,9 +83,9 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_broadcast.c b/test_conformance/workgroups/test_wg_broadcast.c index 6a49aa21..c9986d9f 100644 --- a/test_conformance/workgroups/test_wg_broadcast.c +++ b/test_conformance/workgroups/test_wg_broadcast.c @@ -178,9 +178,9 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_reduce.c b/test_conformance/workgroups/test_wg_reduce.c index 5b7c0f85..0f1fa920 100644 --- a/test_conformance/workgroups/test_wg_reduce.c +++ b/test_conformance/workgroups/test_wg_reduce.c @@ -180,9 +180,9 @@ test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_comma if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -281,9 +281,9 @@ test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_comm if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -381,9 +381,9 @@ test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_comm if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -482,9 +482,9 @@ test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_com if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.c b/test_conformance/workgroups/test_wg_scan_exclusive_add.c index 99d9273e..e82ac4c9 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_add.c +++ b/test_conformance/workgroups/test_wg_scan_exclusive_add.c @@ -188,9 +188,9 @@ test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -289,9 +289,9 @@ test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -389,9 +389,9 @@ test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -490,9 +490,9 @@ test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.c b/test_conformance/workgroups/test_wg_scan_inclusive_add.c index c64d8047..1ac2f7f8 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_add.c +++ b/test_conformance/workgroups/test_wg_scan_inclusive_add.c @@ -177,9 +177,9 @@ test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -278,9 +278,9 @@ test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -378,9 +378,9 @@ test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -479,9 +479,9 @@ test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.c b/test_conformance/workgroups/test_wg_scan_inclusive_max.c index 2b209886..bb1e9961 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_max.c +++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.c @@ -179,9 +179,9 @@ test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -280,9 +280,9 @@ test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -380,9 +380,9 @@ test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -481,9 +481,9 @@ test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.c b/test_conformance/workgroups/test_wg_scan_inclusive_min.c index 89012076..707f04d9 100644 --- a/test_conformance/workgroups/test_wg_scan_inclusive_min.c +++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.c @@ -179,9 +179,9 @@ test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -280,9 +280,9 @@ test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -380,9 +380,9 @@ test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context, if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems; @@ -481,9 +481,9 @@ test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context if (err) return -1; - err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL); - if (err) - return -1; + // "wg_size" is limited to that of the first dimension as only a 1DRange is executed. + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed"); num_elements = n_elems;