// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP #define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP #include #include #include #include #include // Common for all OpenCL C++ tests #include "../common.hpp" namespace test_sub_group_barrier { enum class barrier_type { local, global }; struct test_options { barrier_type barrier; size_t max_count; size_t num_tests; }; const std::string source_common = R"( // Circular shift of sub-group local ids size_t get_shifted_local_id(int sub_group_local_id_delta) { const int sub_group_size = (int)get_sub_group_size(); return (get_local_id(0) - get_sub_group_local_id()) + (((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size; } // Get global ids from shifted local ids size_t get_shifted_global_id(int sub_group_local_id_delta) { return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta); } )"; // ----------------------------------------------------------------------------------- // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ // ----------------------------------------------------------------------------------- #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) std::string generate_source(test_options options) { std::stringstream s; s << R"( #pragma OPENCL EXTENSION cl_khr_subgroups : enable )"; s << source_common; if (options.barrier == barrier_type::global) { s << R"( kernel void test(const int iter_lo, const int iter_hi, global long *output) { const size_t gid = get_shifted_global_id(0); output[gid] = gid; sub_group_barrier(CLK_GLOBAL_MEM_FENCE); for (int i = iter_lo; i < iter_hi; i++) { const size_t other_gid = get_shifted_global_id(i); output[other_gid] += other_gid; sub_group_barrier(CLK_GLOBAL_MEM_FENCE); output[gid] += gid; sub_group_barrier(CLK_GLOBAL_MEM_FENCE); } } )"; } else if (options.barrier == barrier_type::local) { s << R"( kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values) { const size_t gid = get_shifted_global_id(0); const size_t lid = get_shifted_local_id(0); values[lid] = gid; sub_group_barrier(CLK_LOCAL_MEM_FENCE); for (int i = iter_lo; i < iter_hi; i++) { const size_t other_lid = get_shifted_local_id(i); const size_t other_gid = get_shifted_global_id(i); values[other_lid] += other_gid; sub_group_barrier(CLK_LOCAL_MEM_FENCE); values[lid] += gid; sub_group_barrier(CLK_LOCAL_MEM_FENCE); } output[gid] = values[lid]; } )"; } return s.str(); } #else std::string generate_source(test_options options) { std::stringstream s; s << R"( #include #include #include using namespace cl; )"; s << source_common; if (options.barrier == barrier_type::global) { s << R"( kernel void test(const int iter_lo, const int iter_hi, global_ptr output) { const size_t gid = get_shifted_global_id(0); output[gid] = gid; sub_group_barrier(mem_fence::global); for (int i = iter_lo; i < iter_hi; i++) { const size_t other_gid = get_shifted_global_id(i); output[other_gid] += other_gid; sub_group_barrier(mem_fence::global); output[gid] += gid; sub_group_barrier(mem_fence::global); } } )"; } else if (options.barrier == barrier_type::local) { s << R"( kernel void test(const int iter_lo, const int iter_hi, global_ptr output, local_ptr values) { const size_t gid = get_shifted_global_id(0); const size_t lid = get_shifted_local_id(0); values[lid] = gid; sub_group_barrier(mem_fence::local); for (int i = iter_lo; i < iter_hi; i++) { const size_t other_lid = get_shifted_local_id(i); const size_t other_gid = get_shifted_global_id(i); values[other_lid] += other_gid; sub_group_barrier(mem_fence::local); values[lid] += gid; sub_group_barrier(mem_fence::local); } output[gid] = values[lid]; } )"; } return s.str(); } #endif int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options) { int error = CL_SUCCESS; #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) if (!is_extension_available(device, "cl_khr_subgroups")) { log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n"); return CL_SUCCESS; } #endif cl_program program; cl_kernel kernel; std::string kernel_name = "test"; std::string source = generate_source(options); // ----------------------------------------------------------------------------------- // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------ // ----------------------------------------------------------------------------------- // Only OpenCL C++ to SPIR-V compilation #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION) error = create_opencl_kernel( context, &program, &kernel, source, kernel_name ); RETURN_ON_ERROR(error) return error; // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code) #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) error = create_opencl_kernel( context, &program, &kernel, source, kernel_name, "-cl-std=CL2.0", false ); RETURN_ON_ERROR(error) // Normal run #else error = create_opencl_kernel( context, &program, &kernel, source, kernel_name ); RETURN_ON_ERROR(error) #endif size_t max_work_group_size; error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL); RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") if (options.barrier == barrier_type::local) { cl_ulong kernel_local_mem_size; error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL); RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo") cl_ulong device_local_mem_size; error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL); RETURN_ON_CL_ERROR(error, "clGetDeviceInfo") max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long)); } std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution global_size_dis(1, options.max_count); std::uniform_int_distribution local_size_dis(1, max_work_group_size); std::uniform_int_distribution iter_dis(0, 20); for (size_t test = 0; test < options.num_tests; test++) { const size_t global_size = global_size_dis(gen); const size_t local_size = local_size_dis(gen); const size_t count = global_size; const int iter_lo = -iter_dis(gen); const int iter_hi = +iter_dis(gen); cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error); RETURN_ON_CL_ERROR(error, "clCreateBuffer") error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo); RETURN_ON_CL_ERROR(error, "clSetKernelArg") error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi); RETURN_ON_CL_ERROR(error, "clSetKernelArg") error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer); RETURN_ON_CL_ERROR(error, "clSetKernelArg") if (options.barrier == barrier_type::local) { error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL); RETURN_ON_CL_ERROR(error, "clSetKernelArg") } error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel") std::vector output(count); error = clEnqueueReadBuffer( queue, output_buffer, CL_TRUE, 0, sizeof(cl_long) * count, static_cast(output.data()), 0, NULL, NULL ); RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer") error = clReleaseMemObject(output_buffer); RETURN_ON_CL_ERROR(error, "clReleaseMemObject") for (size_t gid = 0; gid < count; gid++) { const long value = output[gid]; const long expected = gid + 2 * gid * (iter_hi - iter_lo); if (value != expected) { RETURN_ON_ERROR_MSG(-1, "Element %lu has incorrect value. Expected: %ld, got: %ld", gid, expected, value ); } } } clReleaseKernel(kernel); clReleaseProgram(program); return error; } AUTO_TEST_CASE(test_sub_group_barrier_global) (cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { test_options options; options.barrier = barrier_type::global; options.num_tests = 1000; options.max_count = num_elements; return test(device, context, queue, options); } AUTO_TEST_CASE(test_sub_group_barrier_local) (cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { test_options options; options.barrier = barrier_type::local; options.num_tests = 1000; options.max_count = num_elements; return test(device, context, queue, options); } } // namespace #endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP