diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 1df528ee..edf12c8e 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_out_of_order.cpp mutable_command_global_size.cpp mutable_command_local_size.cpp mutable_command_global_offset.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 7e3ef52b..07c9550d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,8 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_out_of_order), + ADD_TEST(mutable_dispatch_simultaneous_out_of_order), ADD_TEST(mutable_dispatch_global_size), ADD_TEST(mutable_dispatch_local_size), ADD_TEST(mutable_dispatch_global_offset), diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp new file mode 100644 index 00000000..d507dadf --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp @@ -0,0 +1,454 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// - simultaneous use +// - cross-queue simultaneous-use + +namespace { + +template +struct OutOfOrderTest : public BasicMutableCommandBufferTest +{ + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + out_of_order_queue(nullptr), out_of_order_command_buffer(this), + user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), + program_fill(nullptr) + { + simultaneous_use_requested = simultaneous_request; + if (simultaneous_request) buffer_size_multiplier = 2; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernel() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernel(); + test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed"); + + // create additional kernel to properly prepare output buffer for test + const char* kernel_str = + R"( + __kernel void fill(int pattern, __global int* out, __global int* + offset) + { + size_t id = get_global_id(0); + size_t ind = offset[0] + id ; + out[ind] = pattern; + })"; + + error = create_single_kernel_helper_create_program( + context, &program_fill, 1, &kernel_str); + test_error(error, "Failed to create program with source"); + + error = + clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + kernel_fill = clCreateKernel(program_fill, "fill", &error); + test_error(error, "Failed to create copy kernel"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernelArgs() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs(); + test_error(error, + "BasicMutableCommandBufferTest::SetUpKernelArgs failed"); + + error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), + &overwritten_pattern); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); + test_error(error, "clSetKernelArg failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUp(int elements) override + { + cl_int error = BasicMutableCommandBufferTest::SetUp(elements); + test_error(error, "BasicMutableCommandBufferTest::SetUp failed"); + + error = SetUpKernel(); + test_error(error, "SetUpKernel failed"); + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0 + }; + + out_of_order_command_buffer = clCreateCommandBufferKHR( + 1, &out_of_order_queue, properties, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + + return !out_of_order_support + || (simultaneous_use_requested && !simultaneous_use_support) + || !mutable_support || BasicMutableCommandBufferTest::Skip(); + } + + //-------------------------------------------------------------------------- + cl_int Run() override + { + cl_int error = CL_SUCCESS; + + if (simultaneous_use_support) + { + // enqueue simultaneous command-buffers with out-of-order calls + error = RunSimultaneous(); + test_error(error, "RunSimultaneous failed"); + } + else + { + // enqueue single command-buffer with out-of-order calls + error = RunSingle(); + test_error(error, "RunSingle failed"); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + const cl_int pattern = pattern_pri; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSingle() + { + cl_int error; + + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordSimultaneousCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + // for both simultaneous passes this call will fill entire in_mem buffer + cl_int error = clCommandFillBufferKHR( + out_of_order_command_buffer, nullptr, in_mem, &pattern_pri, + sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, + &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + // to avoid overwriting the entire result buffer instead of filling + // only relevant part this additional kernel was introduced + + error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr, + nullptr, kernel_fill, 1, nullptr, + &num_elements, nullptr, 0, nullptr, + &sync_points[1], &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + struct SimulPassData + { + cl_int offset; + std::vector output_buffer; + // 0:user event, 1:offset-buffer fill event, 2:kernel done event + clEventWrapper wait_events[3]; + }; + + //-------------------------------------------------------------------------- + cl_int EnqueueSimultaneousPass(SimulPassData& pd) + { + cl_int error = CL_SUCCESS; + if (!user_event) + { + user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + } + + pd.wait_events[0] = user_event; + + // filling offset buffer must wait for previous pass completeness + error = clEnqueueFillBuffer( + out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, + sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), + (wait_pass_event != nullptr ? &wait_pass_event : nullptr), + &pd.wait_events[1]); + test_error(error, "clEnqueueFillBuffer failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSimultaneous() + { + cl_int error = RecordSimultaneousCommandBuffer(); + test_error(error, "RecordSimultaneousCommandBuffer failed"); + + cl_int offset = static_cast(num_elements); + + std::vector simul_passes = { + { 0, std::vector(num_elements) }, + { offset, std::vector(num_elements) } + }; + + for (auto&& pass : simul_passes) + { + error = EnqueueSimultaneousPass(pass); + test_error(error, "EnqueueSimultaneousPass failed"); + + wait_pass_event = pass.wait_events[2]; + } + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(out_of_order_queue); + test_error(error, "clFinish failed"); + + // verify the result buffers + for (auto&& pass : simul_passes) + { + auto& res_data = pass.output_buffer; + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); + } + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + + clEventWrapper user_event; + clEventWrapper single_event; + clEventWrapper wait_pass_event; + + clKernelWrapper kernel_fill; + clProgramWrapper program_fill; + + const size_t test_global_work_size = 3 * sizeof(cl_int); + cl_mutable_command_khr command = nullptr; + + const cl_int overwritten_pattern = 0xACDC; + const cl_int pattern_pri = 42; +}; + +} // anonymous namespace + +int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 5e1aa8e5..3558401b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,6 +59,13 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_simultaneous_out_of_order( + cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements); extern int test_mutable_dispatch_global_size(cl_device_id device, cl_context context, cl_command_queue queue,