mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Use CL_KERNEL_WORK_GROUP_SIZE more often (#2435)
Drivers _may_ choose to advertise values for `CL_DEVICE_MAX_WORK_GROUP_SIZE` or `CL_DEVICE_MAX_WORK_ITEM_SIZES` that kernels without a `reqd_work_group_size` are not able to be launched with. The CTS should therefore make sure that the local_size passed to `clEnqueueNDRangeKernel` does not exceed `CL_KERNEL_WORK_GROUP_SIZE` This fixes it up in two places I've noticed this not happening.
This commit is contained in:
@@ -107,21 +107,6 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
|
||||
threads[1] = h;
|
||||
threads[2] = d;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||
3 * sizeof(size_t), (size_t *)localThreads, NULL);
|
||||
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed");
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
|
||||
&maxWorkgroupSize, NULL);
|
||||
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
localThreads[0] =
|
||||
std::min({ localThreads[0], threads[0], maxWorkgroupSize });
|
||||
localThreads[1] = std::min(
|
||||
{ localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] });
|
||||
localThreads[2] =
|
||||
std::min({ localThreads[2], threads[2],
|
||||
maxWorkgroupSize / (localThreads[0] * localThreads[1]) });
|
||||
|
||||
clSamplerWrapper sampler = clCreateSampler(
|
||||
context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
||||
test_error(err, "clCreateSampler failed");
|
||||
@@ -143,6 +128,21 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
|
||||
&read3d_kernel_code, "read3d");
|
||||
test_error(err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||
3 * sizeof(size_t), (size_t *)localThreads, NULL);
|
||||
test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed");
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(size_t), &maxWorkgroupSize, NULL);
|
||||
test_error(err, "clGetDeviceInfo(CL_KERNEL_WORK_GROUP_SIZE) failed\n");
|
||||
|
||||
localThreads[0] =
|
||||
std::min({ localThreads[0], threads[0], maxWorkgroupSize });
|
||||
localThreads[1] = std::min(
|
||||
{ localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] });
|
||||
localThreads[2] =
|
||||
std::min({ localThreads[2], threads[2],
|
||||
maxWorkgroupSize / (localThreads[0] * localThreads[1]) });
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
|
||||
|
||||
Reference in New Issue
Block a user