mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Use CL_KERNEL_WORK_GROUP_SIZE more often (#2435)
Drivers _may_ choose to advertise values for `CL_DEVICE_MAX_WORK_GROUP_SIZE` or `CL_DEVICE_MAX_WORK_ITEM_SIZES` that kernels without a `reqd_work_group_size` are not able to be launched with. The CTS should therefore make sure that the local_size passed to `clEnqueueNDRangeKernel` does not exceed `CL_KERNEL_WORK_GROUP_SIZE` This fixes it up in two places I've noticed this not happening.
This commit is contained in:
@@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1))
|
|||||||
nullptr);
|
nullptr);
|
||||||
test_error(error, "clGetDeviceInfo failed");
|
test_error(error, "clGetDeviceInfo failed");
|
||||||
|
|
||||||
max_local = max_work_item_sizes[0];
|
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||||
|
sizeof(max_local), &max_local, nullptr);
|
||||||
|
test_error(error, "clGetKernelWorkGroupInfo failed");
|
||||||
|
|
||||||
|
max_local = std::min(max_local, max_work_item_sizes[0]);
|
||||||
|
|
||||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||||
(void *)&platform, NULL);
|
(void *)&platform, NULL);
|
||||||
|
|||||||
@@ -107,21 +107,6 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
|
|||||||
threads[1] = h;
|
threads[1] = h;
|
||||||
threads[2] = d;
|
threads[2] = d;
|
||||||
|
|
||||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
||||||
3 * sizeof(size_t), (size_t *)localThreads, NULL);
|
|
||||||
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed");
|
|
||||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
|
|
||||||
&maxWorkgroupSize, NULL);
|
|
||||||
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
|
||||||
|
|
||||||
localThreads[0] =
|
|
||||||
std::min({ localThreads[0], threads[0], maxWorkgroupSize });
|
|
||||||
localThreads[1] = std::min(
|
|
||||||
{ localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] });
|
|
||||||
localThreads[2] =
|
|
||||||
std::min({ localThreads[2], threads[2],
|
|
||||||
maxWorkgroupSize / (localThreads[0] * localThreads[1]) });
|
|
||||||
|
|
||||||
clSamplerWrapper sampler = clCreateSampler(
|
clSamplerWrapper sampler = clCreateSampler(
|
||||||
context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
||||||
test_error(err, "clCreateSampler failed");
|
test_error(err, "clCreateSampler failed");
|
||||||
@@ -143,6 +128,21 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
|
|||||||
&read3d_kernel_code, "read3d");
|
&read3d_kernel_code, "read3d");
|
||||||
test_error(err, "create_single_kernel_helper failed");
|
test_error(err, "create_single_kernel_helper failed");
|
||||||
|
|
||||||
|
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||||
|
3 * sizeof(size_t), (size_t *)localThreads, NULL);
|
||||||
|
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed");
|
||||||
|
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||||
|
sizeof(size_t), &maxWorkgroupSize, NULL);
|
||||||
|
test_error(err, "clGetDeviceInfo(CL_KERNEL_WORK_GROUP_SIZE) failed\n");
|
||||||
|
|
||||||
|
localThreads[0] =
|
||||||
|
std::min({ localThreads[0], threads[0], maxWorkgroupSize });
|
||||||
|
localThreads[1] = std::min(
|
||||||
|
{ localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] });
|
||||||
|
localThreads[2] =
|
||||||
|
std::min({ localThreads[2], threads[2],
|
||||||
|
maxWorkgroupSize / (localThreads[0] * localThreads[1]) });
|
||||||
|
|
||||||
// create kernel args object and set arg values.
|
// create kernel args object and set arg values.
|
||||||
// set the args values
|
// set the args values
|
||||||
err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
|
err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user