diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp index 70a78f4a..c1b88be5 100644 --- a/test_conformance/api/test_sub_group_dispatch.cpp +++ b/test_conformance/api/test_sub_group_dispatch.cpp @@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1)) nullptr); test_error(error, "clGetDeviceInfo failed"); - max_local = max_work_item_sizes[0]; + error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(max_local), &max_local, nullptr); + test_error(error, "clGetKernelWorkGroupInfo failed"); + + max_local = std::min(max_local, max_work_item_sizes[0]); error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp index 7d654ca5..7a711e6f 100644 --- a/test_conformance/profiling/execute_multipass.cpp +++ b/test_conformance/profiling/execute_multipass.cpp @@ -107,21 +107,6 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue threads[1] = h; threads[2] = d; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, - 3 * sizeof(size_t), (size_t *)localThreads, NULL); - test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), - &maxWorkgroupSize, NULL); - test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed"); - - localThreads[0] = - std::min({ localThreads[0], threads[0], maxWorkgroupSize }); - localThreads[1] = std::min( - { localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] }); - localThreads[2] = - std::min({ localThreads[2], threads[2], - maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); - clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); test_error(err, "clCreateSampler failed"); @@ -143,6 +128,21 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue &read3d_kernel_code, "read3d"); test_error(err, "create_single_kernel_helper failed"); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + 3 * sizeof(size_t), (size_t *)localThreads, NULL); + test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); + err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &maxWorkgroupSize, NULL); + test_error(err, "clGetDeviceInfo(CL_KERNEL_WORK_GROUP_SIZE) failed\n"); + + localThreads[0] = + std::min({ localThreads[0], threads[0], maxWorkgroupSize }); + localThreads[1] = std::min( + { localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] }); + localThreads[2] = + std::min({ localThreads[2], threads[2], + maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); + // create kernel args object and set arg values. // set the args values err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);