Add out of order tests for cl_khr_command_buffer_mutable_dispatch. (#1746)

* Add out of order tests for cl_khr_command_buffer_mutable_dispatch.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Replace CL_KERNEL_EXEC_INFO_SVM_PTRS with cl_mutable_dispatch_arg_khr command-buffer mutable buffer update.

CL_KERNEL_EXEC_INFO_SVM_PTRS limits the test to devices which support SVM.
Updating arg_list with a cl_mutable_dispatch_arg_khr struct is one of the
best supported, and also easiest to verify, configurations to change.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix skip condition.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix review changes.

Changes made:
- Fix skip condition
- Add event
- Add memory verification

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Fix review comments.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

---------

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>
This commit is contained in:
Paweł Jastrzębski
2023-07-11 19:43:29 +02:00
committed by GitHub
parent 64130824a8
commit ae1a712e3e
4 changed files with 464 additions and 0 deletions

View File

@@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH)
set(${MODULE_NAME}_SOURCES
main.cpp
mutable_command_info.cpp
mutable_command_out_of_order.cpp
mutable_command_global_size.cpp
mutable_command_local_size.cpp
mutable_command_global_offset.cpp

View File

@@ -26,6 +26,8 @@ test_definition test_list[] = {
ADD_TEST(mutable_command_info_global_work_offset),
ADD_TEST(mutable_command_info_local_work_size),
ADD_TEST(mutable_command_info_global_work_size),
ADD_TEST(mutable_dispatch_out_of_order),
ADD_TEST(mutable_dispatch_simultaneous_out_of_order),
ADD_TEST(mutable_dispatch_global_size),
ADD_TEST(mutable_dispatch_local_size),
ADD_TEST(mutable_dispatch_global_offset),

View File

@@ -0,0 +1,454 @@
//
// Copyright (c) 2022 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <extensionHelpers.h>
#include <vector>
#include "mutable_command_basic.h"
#include <CL/cl.h>
#include <CL/cl_ext.h>
////////////////////////////////////////////////////////////////////////////////
// mutable dispatch tests which handle following cases:
// - simultaneous use
// - cross-queue simultaneous-use
namespace {
template <bool simultaneous_request>
struct OutOfOrderTest : public BasicMutableCommandBufferTest
{
OutOfOrderTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this),
user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
program_fill(nullptr)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
cl_int error = BasicMutableCommandBufferTest::SetUpKernel();
test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed");
// create additional kernel to properly prepare output buffer for test
const char* kernel_str =
R"(
__kernel void fill(int pattern, __global int* out, __global int*
offset)
{
size_t id = get_global_id(0);
size_t ind = offset[0] + id ;
out[ind] = pattern;
})";
error = create_single_kernel_helper_create_program(
context, &program_fill, 1, &kernel_str);
test_error(error, "Failed to create program with source");
error =
clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
kernel_fill = clCreateKernel(program_fill, "fill", &error);
test_error(error, "Failed to create copy kernel");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs();
test_error(error,
"BasicMutableCommandBufferTest::SetUpKernelArgs failed");
error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
&overwritten_pattern);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
cl_int error = BasicMutableCommandBufferTest::SetUp(elements);
test_error(error, "BasicMutableCommandBufferTest::SetUp failed");
error = SetUpKernel();
test_error(error, "SetUpKernel failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0
};
out_of_order_command_buffer = clCreateCommandBufferKHR(
1, &out_of_order_queue, properties, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
bool Skip() override
{
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !out_of_order_support
|| (simultaneous_use_requested && !simultaneous_use_support)
|| !mutable_support || BasicMutableCommandBufferTest::Skip();
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error =
clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
&pattern, sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[0], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error;
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
nullptr,
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_mutable_base_config_khr mutable_config{
CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
&dispatch_config
};
error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
&mutable_config);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer()
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
&sync_points[0], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
// to avoid overwriting the entire result buffer instead of filling
// only relevant part this additional kernel was introduced
error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr,
nullptr, kernel_fill, 1, nullptr,
&num_elements, nullptr, 0, nullptr,
&sync_points[1], &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clCommandNDRangeKernelKHR(
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
&pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
nullptr,
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_mutable_base_config_khr mutable_config{
CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
&dispatch_config
};
error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
&mutable_config);
test_error(error, "clUpdateMutableCommandsKHR failed");
// command buffer execution must wait for two wait-events
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
&pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
wait_pass_event = pass.wait_events[2];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(out_of_order_queue);
test_error(error, "clFinish failed");
// verify the result buffers
for (auto&& pass : simul_passes)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
}
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
clEventWrapper user_event;
clEventWrapper single_event;
clEventWrapper wait_pass_event;
clKernelWrapper kernel_fill;
clProgramWrapper program_fill;
const size_t test_global_work_size = 3 * sizeof(cl_int);
cl_mutable_command_khr command = nullptr;
const cl_int overwritten_pattern = 0xACDC;
const cl_int pattern_pri = 42;
};
} // anonymous namespace
int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
num_elements);
}
int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements)
{
return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
num_elements);
}

View File

@@ -59,6 +59,13 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
extern int test_mutable_dispatch_out_of_order(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
extern int test_mutable_dispatch_simultaneous_out_of_order(
cl_device_id device, cl_context context, cl_command_queue queue,
int num_elements);
extern int test_mutable_dispatch_global_size(cl_device_id device,
cl_context context,
cl_command_queue queue,