mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
cl20: Khronos Bug 15745 Limit work-group sizes per dimension.
Until now, the workgroups test computed the work-group size for execution by querying the maximum number of work-items based on the kernel. This returns the overall limit on workgroup size but not the limits in each specific dimension. This change additionally restricts the work-group size to the device's dimensional limits. Problem: The kernel's maximum work-group size is used to execute it in a single dimension, without regard for dimensional limitations. Spec References: OpenCL 2.0 rev 19 Solution: The kernel's maximum work-group size is decreased such that it meets any dimensional limitations before it's enqueued. Test Suite Affected: Workgroups Side Effects: None Change-Id: I1c642af087f1934723ac7218ca9a35413ff83224
This commit is contained in:
committed by
Kévin Petit
parent
ce74e803b9
commit
e0d7ab2187
@@ -171,6 +171,7 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -184,6 +185,14 @@ test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_comma
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
@@ -272,6 +281,7 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -285,6 +295,14 @@ test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_comm
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
@@ -372,6 +390,7 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -385,6 +404,14 @@ test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_comm
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
|
||||
@@ -473,6 +500,7 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -486,6 +514,14 @@ test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_com
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
|
||||
|
||||
@@ -171,6 +171,7 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -184,6 +185,14 @@ test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_comma
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
@@ -272,6 +281,7 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -285,6 +295,14 @@ test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_comm
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
@@ -372,6 +390,7 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -385,6 +404,14 @@ test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_comm
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
|
||||
@@ -473,6 +500,7 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -486,6 +514,14 @@ test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_com
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
|
||||
|
||||
@@ -170,6 +170,7 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -183,6 +184,14 @@ test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
@@ -271,6 +280,7 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -284,6 +294,14 @@ test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
@@ -371,6 +389,7 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -384,6 +403,14 @@ test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
|
||||
@@ -472,6 +499,7 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -485,6 +513,14 @@ test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
|
||||
|
||||
@@ -171,6 +171,7 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -184,6 +185,14 @@ test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
@@ -272,6 +281,7 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -285,6 +295,14 @@ test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
@@ -372,6 +390,7 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context,
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -385,6 +404,14 @@ test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context,
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
|
||||
@@ -473,6 +500,7 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context
|
||||
void *values[2];
|
||||
size_t threads[1];
|
||||
size_t wg_size[1];
|
||||
size_t wg_sizes_per_dimension[3];
|
||||
size_t num_elements;
|
||||
int err;
|
||||
int i;
|
||||
@@ -486,6 +514,14 @@ test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
|
||||
if (err)
|
||||
return -1;
|
||||
if(wg_sizes_per_dimension[0] < wg_size[0])
|
||||
{
|
||||
wg_size[0] = wg_sizes_per_dimension[0];
|
||||
}
|
||||
|
||||
num_elements = n_elems;
|
||||
|
||||
input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
|
||||
|
||||
Reference in New Issue
Block a user