cl20: Khronos Bug 15745 Limit work-group sizes per dimension.

Until now, the workgroups test computed the work-group size for
execution by querying the maximum number of work-items based on the
kernel. This returns the overall limit on workgroup size but not the
limits in each specific dimension. This change additionally restricts
the work-group size to the device's dimensional limits.

Problem: The kernel's maximum work-group size is used to execute it in a
single dimension, without regard for dimensional limitations.

Spec References: OpenCL 2.0 rev 19

Solution: The kernel's maximum work-group size is decreased such that it
meets any dimensional limitations before it's enqueued.

Test Suite Affected: Workgroups

Side Effects: None

Change-Id: I1c642af087f1934723ac7218ca9a35413ff83224
This commit is contained in:
Samuel Pauls
2016-04-11 16:48:51 -04:00
committed by Kévin Petit
parent ce74e803b9
commit e0d7ab2187
4 changed files with 144 additions and 0 deletions

View File

@@ -171,6 +171,7 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -184,6 +185,14 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
@@ -272,6 +281,7 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -285,6 +295,14 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
@@ -372,6 +390,7 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -385,6 +404,14 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
@@ -473,6 +500,7 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -486,6 +514,14 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);

View File

@@ -171,6 +171,7 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -184,6 +185,14 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
@@ -272,6 +281,7 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -285,6 +295,14 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
@@ -372,6 +390,7 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -385,6 +404,14 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
@@ -473,6 +500,7 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -486,6 +514,14 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);

View File

@@ -170,6 +170,7 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -183,6 +184,14 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
@@ -271,6 +280,7 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -284,6 +294,14 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
@@ -371,6 +389,7 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -384,6 +403,14 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
@@ -472,6 +499,7 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -485,6 +513,14 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);

View File

@@ -171,6 +171,7 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -184,6 +185,14 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
@@ -272,6 +281,7 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -285,6 +295,14 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
@@ -372,6 +390,7 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context,
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -385,6 +404,14 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context,
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
@@ -473,6 +500,7 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context
void *values[2];
size_t threads[1];
size_t wg_size[1];
size_t wg_sizes_per_dimension[3];
size_t num_elements;
int err;
int i;
@@ -486,6 +514,14 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context
if (err)
return -1;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
if (err)
return -1;
if(wg_sizes_per_dimension[0] < wg_size[0])
{
wg_size[0] = wg_sizes_per_dimension[0];
}
num_elements = n_elems;
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);