From e0d7ab218707dfd47e7f8cd5a305f4c55c97840b Mon Sep 17 00:00:00 2001 From: Samuel Pauls Date: Mon, 11 Apr 2016 16:48:51 -0400 Subject: [PATCH] cl20: Khronos Bug 15745 Limit work-group sizes per dimension. Until now, the workgroups test computed the work-group size for execution by querying the maximum number of work-items based on the kernel. This returns the overall limit on workgroup size but not the limits in each specific dimension. This change additionally restricts the work-group size to the device's dimensional limits. Problem: The kernel's maximum work-group size is used to execute it in a single dimension, without regard for dimensional limitations. Spec References: OpenCL 2.0 rev 19 Solution: The kernel's maximum work-group size is decreased such that it meets any dimensional limitations before it's enqueued. Test Suite Affected: Workgroups Side Effects: None Change-Id: I1c642af087f1934723ac7218ca9a35413ff83224 --- .../workgroups/test_wg_reduce_max.c | 36 +++++++++++++++++++ .../workgroups/test_wg_reduce_min.c | 36 +++++++++++++++++++ .../workgroups/test_wg_scan_exclusive_max.c | 36 +++++++++++++++++++ .../workgroups/test_wg_scan_exclusive_min.c | 36 +++++++++++++++++++ 4 files changed, 144 insertions(+) diff --git a/test_conformance/workgroups/test_wg_reduce_max.c b/test_conformance/workgroups/test_wg_reduce_max.c index 2af86f85..4bebe81f 100644 --- a/test_conformance/workgroups/test_wg_reduce_max.c +++ b/test_conformance/workgroups/test_wg_reduce_max.c @@ -171,6 +171,7 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -184,6 +185,14 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); @@ -272,6 +281,7 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -285,6 +295,14 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements); @@ -372,6 +390,7 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -385,6 +404,14 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements); @@ -473,6 +500,7 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -486,6 +514,14 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements); diff --git a/test_conformance/workgroups/test_wg_reduce_min.c b/test_conformance/workgroups/test_wg_reduce_min.c index 7b302201..582a7608 100644 --- a/test_conformance/workgroups/test_wg_reduce_min.c +++ b/test_conformance/workgroups/test_wg_reduce_min.c @@ -171,6 +171,7 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -184,6 +185,14 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); @@ -272,6 +281,7 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -285,6 +295,14 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements); @@ -372,6 +390,7 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -385,6 +404,14 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements); @@ -473,6 +500,7 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -486,6 +514,14 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements); diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.c b/test_conformance/workgroups/test_wg_scan_exclusive_max.c index 87a870b9..5b68cada 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_max.c +++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.c @@ -170,6 +170,7 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -183,6 +184,14 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); @@ -271,6 +280,7 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -284,6 +294,14 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements); @@ -371,6 +389,7 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -384,6 +403,14 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements); @@ -472,6 +499,7 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -485,6 +513,14 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements); diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.c b/test_conformance/workgroups/test_wg_scan_exclusive_min.c index 435a8704..03ec3587 100644 --- a/test_conformance/workgroups/test_wg_scan_exclusive_min.c +++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.c @@ -171,6 +171,7 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -184,6 +185,14 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements); @@ -272,6 +281,7 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -285,6 +295,14 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements); @@ -372,6 +390,7 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -385,6 +404,14 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements); @@ -473,6 +500,7 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context void *values[2]; size_t threads[1]; size_t wg_size[1]; + size_t wg_sizes_per_dimension[3]; size_t num_elements; int err; int i; @@ -486,6 +514,14 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context if (err) return -1; + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL); + if (err) + return -1; + if(wg_sizes_per_dimension[0] < wg_size[0]) + { + wg_size[0] = wg_sizes_per_dimension[0]; + } + num_elements = n_elems; input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);