From 0fd00e02eea52b9e97ddc98a5cd445a0edbd4c96 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 2 Mar 2023 10:16:32 +0100 Subject: [PATCH] Out-of-order command-queue test cases for cl_khr_command_buffer (#1596) * Initial commit for out-of-order test cases (#1369, p.2): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Added support for simultaneous test of out-of-order command buffers and queues (#1369, p.2.1): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Cosmetic fixes (#1369, p.2): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Corrected two bugs related to simultaneous test (#1369, p.2.1): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Added additional event to synchronize symultaneous passes, cosmetic fixes (#1369, p.2.1): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Cosmetic fix due to consistency concerns (#1369, p.2): https://github.com/KhronosGroup/OpenCL-CTS/issues/1369 * Fix for prev commit * Added corrections related to changed order of Skip/SetUp methods (issue #1369, 2.0 out-of-order) * Reverted formating of unrelated header * Reverted formating of unrelated header, correction * Reordered initialization of attributes. --- .../cl_khr_command_buffer/CMakeLists.txt | 1 + .../basic_command_buffer.cpp | 87 ----- .../command_buffer_out_of_order.cpp | 352 ++++++++++++++++++ .../extensions/cl_khr_command_buffer/main.cpp | 1 + .../extensions/cl_khr_command_buffer/procs.h | 4 + 5 files changed, 358 insertions(+), 87 deletions(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 44cd1006..097a197c 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_COMMAND_BUFFER) set(${MODULE_NAME}_SOURCES main.cpp basic_command_buffer.cpp + command_buffer_out_of_order.cpp command_buffer_profiling.cpp command_buffer_queue_substitution.cpp command_buffer_test_fill.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 59666563..4a892bb3 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -443,87 +443,6 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest } }; -// Test sync-points with an out-of-order command-buffer -struct OutOfOrderTest : public BasicCommandBufferTest -{ - using BasicCommandBufferTest::BasicCommandBufferTest; - OutOfOrderTest(cl_device_id device, cl_context context, - cl_command_queue queue) - : BasicCommandBufferTest(device, context, queue), - out_of_order_queue(nullptr), out_of_order_command_buffer(this), - event(nullptr) - {} - - cl_int Run() override - { - cl_sync_point_khr sync_points[2]; - - const cl_int pattern = 42; - cl_int error = - clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, - &pattern, sizeof(cl_int), 0, data_size(), 0, - nullptr, &sync_points[0], nullptr); - test_error(error, "clCommandFillBufferKHR failed"); - - const cl_int overwritten_pattern = 0xACDC; - error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, - out_mem, &overwritten_pattern, - sizeof(cl_int), 0, data_size(), 0, - nullptr, &sync_points[1], nullptr); - test_error(error, "clCommandFillBufferKHR failed"); - - error = clCommandNDRangeKernelKHR( - out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, - &num_elements, nullptr, 2, sync_points, nullptr, nullptr); - test_error(error, "clCommandNDRangeKernelKHR failed"); - - error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); - test_error(error, "clFinalizeCommandBufferKHR failed"); - - error = clEnqueueCommandBufferKHR( - 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); - test_error(error, "clEnqueueCommandBufferKHR failed"); - - std::vector output_data(num_elements); - error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, - data_size(), output_data.data(), 1, &event, - nullptr); - test_error(error, "clEnqueueReadBuffer failed"); - - for (size_t i = 0; i < num_elements; i++) - { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); - } - - return CL_SUCCESS; - } - - cl_int SetUp(int elements) override - { - cl_int error = BasicCommandBufferTest::SetUp(elements); - test_error(error, "BasicCommandBufferTest::SetUp failed"); - - out_of_order_queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); - test_error(error, "Unable to create command queue to test with"); - - out_of_order_command_buffer = - clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error); - test_error(error, "clCreateCommandBufferKHR failed"); - - return CL_SUCCESS; - } - - bool Skip() override - { - return BasicCommandBufferTest::Skip() || !out_of_order_support; - } - - clCommandQueueWrapper out_of_order_queue; - clCommandBufferWrapper out_of_order_command_buffer; - clEventWrapper event; -}; - } // anonymous namespace int test_single_ndrange(cl_device_id device, cl_context context, @@ -559,9 +478,3 @@ int test_user_events(cl_device_id device, cl_context context, { return MakeAndRunTest(device, context, queue, num_elements); } - -int test_out_of_order(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return MakeAndRunTest(device, context, queue, num_elements); -} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp new file mode 100644 index 00000000..9e142bf2 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp @@ -0,0 +1,352 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "basic_command_buffer.h" +#include "procs.h" + +#include + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// out-of-order tests for cl_khr_command_buffer which handles below cases: +// -test case for out-of-order command-buffer +// -test an out-of-order command-buffer with simultaneous use + +template +struct OutOfOrderTest : public BasicCommandBufferTest +{ + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue), + out_of_order_queue(nullptr), out_of_order_command_buffer(this), + user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), + program_fill(nullptr) + { + simultaneous_use_requested = simultaneous_request; + if (simultaneous_request) buffer_size_multiplier = 2; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernel() override + { + // if device doesn't support simultaneous use which was requested + // we can skip creation of OCL resources + if (simultaneous_use_requested && !simultaneous_use_support) + return CL_SUCCESS; + + cl_int error = BasicCommandBufferTest::SetUpKernel(); + test_error(error, "BasicCommandBufferTest::SetUpKernel failed"); + + // create additional kernel to properly prepare output buffer for test + const char* kernel_str = + R"( + __kernel void fill(int pattern, __global int* out, __global int* offset) + { + size_t id = get_global_id(0); + size_t ind = offset[0] + id; + out[ind] = pattern; + })"; + + error = create_single_kernel_helper_create_program( + context, &program_fill, 1, &kernel_str); + test_error(error, "Failed to create program with source"); + + error = + clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + kernel_fill = clCreateKernel(program_fill, "fill", &error); + test_error(error, "Failed to create copy kernel"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernelArgs() override + { + // if device doesn't support simultaneous use which was requested + // we can skip creation of OCL resources + if (simultaneous_use_requested && !simultaneous_use_support) + return CL_SUCCESS; + + cl_int error = BasicCommandBufferTest::SetUpKernelArgs(); + test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed"); + + error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), + &overwritten_pattern); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); + test_error(error, "clSetKernelArg failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUp(int elements) override + { + cl_int error = BasicCommandBufferTest::SetUp(elements); + test_error(error, "BasicCommandBufferTest::SetUp failed"); + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0 + }; + + if (simultaneous_use_requested && simultaneous_use_support) + properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR; + + out_of_order_command_buffer = clCreateCommandBufferKHR( + 1, &out_of_order_queue, properties, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + bool Skip() override + { + if (BasicCommandBufferTest::Skip()) return true; + + if (!out_of_order_support + || (simultaneous_use_requested && !simultaneous_use_support)) + return true; + + return false; + } + + //-------------------------------------------------------------------------- + cl_int Run() override + { + cl_int error = CL_SUCCESS; + + if (simultaneous_use_support) + { + // enqueue simultaneous command-buffers with out-of-order calls + error = RunSimultaneous(); + test_error(error, "RunSimultaneous failed"); + } + else + { + // enqueue single command-buffer with out-of-order calls + error = RunSingle(); + test_error(error, "RunSingle failed"); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + const cl_int pattern = pattern_pri; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSingle() + { + cl_int error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &user_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordSimultaneousCommandBuffer() const + { + cl_sync_point_khr sync_points[2]; + // for both simultaneous passes this call will fill entire in_mem buffer + cl_int error = clCommandFillBufferKHR( + out_of_order_command_buffer, nullptr, in_mem, &pattern_pri, + sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, + &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + // to avoid overwriting the entire result buffer instead of filling only + // relevant part this additional kernel was introduced + error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr, + nullptr, kernel_fill, 1, nullptr, + &num_elements, nullptr, 0, nullptr, + &sync_points[1], nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + struct SimulPassData + { + cl_int offset; + std::vector output_buffer; + // 0:user event, 1:offset-buffer fill event, 2:kernel done event + clEventWrapper wait_events[3]; + }; + + //-------------------------------------------------------------------------- + cl_int EnqueueSimultaneousPass(SimulPassData& pd) + { + cl_int error = CL_SUCCESS; + if (!user_event) + { + user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + } + + pd.wait_events[0] = user_event; + + // filling offset buffer must wait for previous pass completeness + error = clEnqueueFillBuffer( + out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, + sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), + (wait_pass_event != nullptr ? &wait_pass_event : nullptr), + &pd.wait_events[1]); + test_error(error, "clEnqueueFillBuffer failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSimultaneous() + { + cl_int error = RecordSimultaneousCommandBuffer(); + test_error(error, "RecordSimultaneousCommandBuffer failed"); + + cl_int offset = static_cast(num_elements); + + std::vector simul_passes = { + { 0, std::vector(num_elements) }, + { offset, std::vector(num_elements) } + }; + + for (auto&& pass : simul_passes) + { + error = EnqueueSimultaneousPass(pass); + test_error(error, "EnqueueSimultaneousPass failed"); + + wait_pass_event = pass.wait_events[2]; + } + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(out_of_order_queue); + test_error(error, "clFinish failed"); + + // verify the result buffers + for (auto&& pass : simul_passes) + { + auto& res_data = pass.output_buffer; + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); + } + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + + clEventWrapper user_event; + clEventWrapper wait_pass_event; + + clKernelWrapper kernel_fill; + clProgramWrapper program_fill; + + const cl_int overwritten_pattern = 0xACDC; + const cl_int pattern_pri = 42; +}; + +} // anonymous namespace + +int test_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} + +int test_simultaneous_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 00663275..d1bb896e 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -21,6 +21,7 @@ test_definition test_list[] = { ADD_TEST(single_ndrange), ADD_TEST(explicit_flush), ADD_TEST(user_events), ADD_TEST(out_of_order), + ADD_TEST(simultaneous_out_of_order), ADD_TEST(basic_profiling), ADD_TEST(simultaneous_profiling), ADD_TEST(queue_substitution), diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 3d1be569..f43bdb59 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -31,6 +31,10 @@ extern int test_user_events(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_out_of_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_simultaneous_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); extern int test_basic_profiling(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_simultaneous_profiling(cl_device_id device, cl_context context,