From ae1a712e3ed14f87857575987389dff26bb74c47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?=
Date: Tue, 11 Jul 2023 19:43:29 +0200
Subject: [PATCH] Add out of order tests for
cl_khr_command_buffer_mutable_dispatch. (#1746)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Add out of order tests for cl_khr_command_buffer_mutable_dispatch.
Signed-off-by: Paweł Jastrzębski
* Replace CL_KERNEL_EXEC_INFO_SVM_PTRS with cl_mutable_dispatch_arg_khr command-buffer mutable buffer update.
CL_KERNEL_EXEC_INFO_SVM_PTRS limits the test to devices which support SVM.
Updating arg_list with a cl_mutable_dispatch_arg_khr struct is one of the
best supported, and also easiest to verify, configurations to change.
Signed-off-by: Paweł Jastrzębski
* Fix skip condition.
Signed-off-by: Paweł Jastrzębski
* Fix review changes.
Changes made:
- Fix skip condition
- Add event
- Add memory verification
Signed-off-by: Paweł Jastrzębski
* Fix review comments.
Signed-off-by: Paweł Jastrzębski
---------
Signed-off-by: Paweł Jastrzębski
---
.../CMakeLists.txt | 1 +
.../main.cpp | 2 +
.../mutable_command_out_of_order.cpp | 454 ++++++++++++++++++
.../procs.h | 7 +
4 files changed, 464 insertions(+)
create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
index 1df528ee..edf12c8e 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
@@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH)
set(${MODULE_NAME}_SOURCES
main.cpp
mutable_command_info.cpp
+ mutable_command_out_of_order.cpp
mutable_command_global_size.cpp
mutable_command_local_size.cpp
mutable_command_global_offset.cpp
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
index 7e3ef52b..07c9550d 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
@@ -26,6 +26,8 @@ test_definition test_list[] = {
ADD_TEST(mutable_command_info_global_work_offset),
ADD_TEST(mutable_command_info_local_work_size),
ADD_TEST(mutable_command_info_global_work_size),
+ ADD_TEST(mutable_dispatch_out_of_order),
+ ADD_TEST(mutable_dispatch_simultaneous_out_of_order),
ADD_TEST(mutable_dispatch_global_size),
ADD_TEST(mutable_dispatch_local_size),
ADD_TEST(mutable_dispatch_global_offset),
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp
new file mode 100644
index 00000000..d507dadf
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp
@@ -0,0 +1,454 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include
+#include
+#include "mutable_command_basic.h"
+
+#include
+#include
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+// - simultaneous use
+// - cross-queue simultaneous-use
+
+namespace {
+
+template
+struct OutOfOrderTest : public BasicMutableCommandBufferTest
+{
+ OutOfOrderTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue),
+ out_of_order_queue(nullptr), out_of_order_command_buffer(this),
+ user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
+ program_fill(nullptr)
+ {
+ simultaneous_use_requested = simultaneous_request;
+ if (simultaneous_request) buffer_size_multiplier = 2;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUpKernel() override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUpKernel();
+ test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed");
+
+ // create additional kernel to properly prepare output buffer for test
+ const char* kernel_str =
+ R"(
+ __kernel void fill(int pattern, __global int* out, __global int*
+ offset)
+ {
+ size_t id = get_global_id(0);
+ size_t ind = offset[0] + id ;
+ out[ind] = pattern;
+ })";
+
+ error = create_single_kernel_helper_create_program(
+ context, &program_fill, 1, &kernel_str);
+ test_error(error, "Failed to create program with source");
+
+ error =
+ clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
+ test_error(error, "Failed to build program");
+
+ kernel_fill = clCreateKernel(program_fill, "fill", &error);
+ test_error(error, "Failed to create copy kernel");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUpKernelArgs() override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs();
+ test_error(error,
+ "BasicMutableCommandBufferTest::SetUpKernelArgs failed");
+
+ error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
+ &overwritten_pattern);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUp(int elements) override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUp(elements);
+ test_error(error, "BasicMutableCommandBufferTest::SetUp failed");
+
+ error = SetUpKernel();
+ test_error(error, "SetUpKernel failed");
+
+ out_of_order_queue = clCreateCommandQueue(
+ context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+ test_error(error, "Unable to create command queue to test with");
+
+ cl_command_buffer_properties_khr properties[3] = {
+ CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0
+ };
+
+ out_of_order_command_buffer = clCreateCommandBufferKHR(
+ 1, &out_of_order_queue, properties, &error);
+ test_error(error, "clCreateCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ bool Skip() override
+ {
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+
+ return !out_of_order_support
+ || (simultaneous_use_requested && !simultaneous_use_support)
+ || !mutable_support || BasicMutableCommandBufferTest::Skip();
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int Run() override
+ {
+ cl_int error = CL_SUCCESS;
+
+ if (simultaneous_use_support)
+ {
+ // enqueue simultaneous command-buffers with out-of-order calls
+ error = RunSimultaneous();
+ test_error(error, "RunSimultaneous failed");
+ }
+ else
+ {
+ // enqueue single command-buffer with out-of-order calls
+ error = RunSingle();
+ test_error(error, "RunSingle failed");
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RecordCommandBuffer()
+ {
+ cl_sync_point_khr sync_points[2];
+ const cl_int pattern = pattern_pri;
+ cl_int error =
+ clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+ &pattern, sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[0], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+ out_mem, &overwritten_pattern,
+ sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[1], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 2, sync_points, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RunSingle()
+ {
+ cl_int error;
+
+ error = RecordCommandBuffer();
+ test_error(error, "RecordCommandBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector output_data(num_elements);
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+ data_size(), output_data.data(), 1,
+ &single_event, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+ }
+
+ clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(cl_int) * num_elements
+ * buffer_size_multiplier,
+ nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
+ &new_out_mem };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
+ &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0,
+ data_size(), output_data.data(), 1,
+ &single_event, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RecordSimultaneousCommandBuffer()
+ {
+ cl_sync_point_khr sync_points[2];
+ // for both simultaneous passes this call will fill entire in_mem buffer
+ cl_int error = clCommandFillBufferKHR(
+ out_of_order_command_buffer, nullptr, in_mem, &pattern_pri,
+ sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
+ &sync_points[0], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ // to avoid overwriting the entire result buffer instead of filling
+ // only relevant part this additional kernel was introduced
+
+ error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr,
+ nullptr, kernel_fill, 1, nullptr,
+ &num_elements, nullptr, 0, nullptr,
+ &sync_points[1], &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 2, sync_points, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ struct SimulPassData
+ {
+ cl_int offset;
+ std::vector output_buffer;
+ // 0:user event, 1:offset-buffer fill event, 2:kernel done event
+ clEventWrapper wait_events[3];
+ };
+
+ //--------------------------------------------------------------------------
+ cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+ {
+ cl_int error = CL_SUCCESS;
+ if (!user_event)
+ {
+ user_event = clCreateUserEvent(context, &error);
+ test_error(error, "clCreateUserEvent failed");
+ }
+
+ pd.wait_events[0] = user_event;
+
+ // filling offset buffer must wait for previous pass completeness
+ error = clEnqueueFillBuffer(
+ out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
+ sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
+ (wait_pass_event != nullptr ? &wait_pass_event : nullptr),
+ &pd.wait_events[1]);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ // command buffer execution must wait for two wait-events
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
+ &pd.wait_events[2]);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
+ pd.offset * sizeof(cl_int), data_size(),
+ pd.output_buffer.data(), 1,
+ &pd.wait_events[2], nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(cl_int) * num_elements
+ * buffer_size_multiplier,
+ nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
+ &new_out_mem };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
+ &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ // command buffer execution must wait for two wait-events
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
+ &pd.wait_events[2]);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE,
+ pd.offset * sizeof(cl_int), data_size(),
+ pd.output_buffer.data(), 1,
+ &pd.wait_events[2], nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RunSimultaneous()
+ {
+ cl_int error = RecordSimultaneousCommandBuffer();
+ test_error(error, "RecordSimultaneousCommandBuffer failed");
+
+ cl_int offset = static_cast(num_elements);
+
+ std::vector simul_passes = {
+ { 0, std::vector(num_elements) },
+ { offset, std::vector(num_elements) }
+ };
+
+ for (auto&& pass : simul_passes)
+ {
+ error = EnqueueSimultaneousPass(pass);
+ test_error(error, "EnqueueSimultaneousPass failed");
+
+ wait_pass_event = pass.wait_events[2];
+ }
+
+ error = clSetUserEventStatus(user_event, CL_COMPLETE);
+ test_error(error, "clSetUserEventStatus failed");
+
+ error = clFinish(out_of_order_queue);
+ test_error(error, "clFinish failed");
+
+ // verify the result buffers
+ for (auto&& pass : simul_passes)
+ {
+ auto& res_data = pass.output_buffer;
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
+ }
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ clCommandQueueWrapper out_of_order_queue;
+ clCommandBufferWrapper out_of_order_command_buffer;
+
+ clEventWrapper user_event;
+ clEventWrapper single_event;
+ clEventWrapper wait_pass_event;
+
+ clKernelWrapper kernel_fill;
+ clProgramWrapper program_fill;
+
+ const size_t test_global_work_size = 3 * sizeof(cl_int);
+ cl_mutable_command_khr command = nullptr;
+
+ const cl_int overwritten_pattern = 0xACDC;
+ const cl_int pattern_pri = 42;
+};
+
+} // anonymous namespace
+
+int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest>(device, context, queue,
+ num_elements);
+}
+
+int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest>(device, context, queue,
+ num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
index 5e1aa8e5..3558401b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
@@ -59,6 +59,13 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
+extern int test_mutable_dispatch_out_of_order(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_simultaneous_out_of_order(
+ cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements);
extern int test_mutable_dispatch_global_size(cl_device_id device,
cl_context context,
cl_command_queue queue,