CTS updates for reworked simultaneous use (#2477)

Actions test plan from
https://github.com/KhronosGroup/OpenCL-CTS/issues/2473 to update CTS
tests to reflect changes from cl_khr_command_buffer PR
https://github.com/KhronosGroup/OpenCL-Docs/pull/1411

* Adds new test in`command_buffer_pipelined_enqueue.cpp` for multiple
enqueues without blocking in-between, but serialized execution.
* Removed test for `CL_COMMAND_BUFFER_STATE_PENDING_KHR` state query.
* Remove negative test for `clEnqueueCommandBuffer` pending state error.
* Simplify `cl_khr_command_buffer` tests that stress simultaneous-use by
testing multiple serialized enqueues of the same command-buffer, which
doesn't now require the device imultaneous-use capability
* Remove simultaneous-use command-buffer creation in base class to off,
and require tests do it themselves if they require it.
* Rewrite mutable dispatch simultaneous test to test updating both
pipelined enqueues, and updating the new definition of simultaneous-use

---------

Co-authored-by: Ewan Crawford <ewan@codeplay.com>
This commit is contained in:
Ewan Crawford
2025-10-14 18:15:20 +02:00
committed by GitHub
parent eded89cdd6
commit bfa96c77d8
19 changed files with 866 additions and 1255 deletions

View File

@@ -17,6 +17,7 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_barrier.cpp
command_buffer_test_event_info.cpp
command_buffer_finalize.cpp
command_buffer_pipelined_enqueue.cpp
negative_command_buffer_finalize.cpp
negative_command_buffer_svm_mem.cpp
negative_command_buffer_copy_image.cpp

View File

@@ -27,9 +27,6 @@ BasicCommandBufferTest::BasicCommandBufferTest(cl_device_id device,
: CommandBufferTestBase(device), context(context), queue(nullptr),
num_elements(0), simultaneous_use_support(false),
out_of_order_support(false), queue_out_of_order_support(false),
// try to use simultaneous path by default
simultaneous_use_requested(true),
// due to simultaneous cases extend buffer size
buffer_size_multiplier(1), command_buffer(this)
{
cl_int error = clRetainCommandQueue(queue);
@@ -72,9 +69,8 @@ bool BasicCommandBufferTest::Skip()
sizeof(capabilities), &capabilities, NULL);
test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
simultaneous_use_support = simultaneous_use_requested
&& (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR)
!= 0;
simultaneous_use_support =
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR) != 0;
out_of_order_support =
supported_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
device_side_enqueue_support =
@@ -167,19 +163,7 @@ cl_int BasicCommandBufferTest::SetUp(int elements)
error = SetUpKernelArgs();
test_error(error, "SetUpKernelArgs failed");
if (simultaneous_use_support)
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR,
0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
}
else
{
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
}
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
@@ -192,11 +176,6 @@ cl_int MultiFlagCreationTest::Run()
// First try to find multiple flags that are supported by the driver and
// device.
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
{
@@ -207,6 +186,11 @@ cl_int MultiFlagCreationTest::Run()
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
}
cl_command_buffer_properties_khr props[] = { CL_COMMAND_BUFFER_FLAGS_KHR,
@@ -381,11 +365,6 @@ cl_int ExplicitFlushTest::Run()
return CL_SUCCESS;
}
bool ExplicitFlushTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int InterleavedEnqueueTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(
@@ -431,11 +410,6 @@ cl_int InterleavedEnqueueTest::Run()
return CL_SUCCESS;
}
bool InterleavedEnqueueTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int EnqueueAndReleaseTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(

View File

@@ -78,8 +78,11 @@ protected:
bool queue_out_of_order_support;
bool device_side_enqueue_support;
// user request for simultaneous use
bool simultaneous_use_requested;
// Extends size of created 'in_mem' & 'out_mem' buffers, such that the same
// cl_mem buffer can be used across multiple enqueues of a command-buffer.
// Accessed in the kernel at an offset for each enqueue which is passed as
// a kernel parameter through the 'off_mem' buffer.
// See BasicCommandBufferTest::SetUpKernel() definition.
unsigned buffer_size_multiplier;
clCommandBufferWrapper command_buffer;
};
@@ -116,7 +119,6 @@ struct ExplicitFlushTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
bool Skip() override;
};
// Test enqueueing a command-buffer twice separated by another enqueue operation
@@ -125,7 +127,6 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
bool Skip() override;
};
// Test releasing a command-buffer after it has been submitted for execution,
@@ -156,9 +157,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
cl_version extension_version =
get_extension_version(device, "cl_khr_command_buffer");
if (extension_version != CL_MAKE_VERSION(0, 9, 7))
if (extension_version != CL_MAKE_VERSION(0, 9, 8))
{
log_info("cl_khr_command_buffer version 0.9.7 is required to run "
log_info("cl_khr_command_buffer version 0.9.8 is required to run "
"the test, skipping.\n ");
return TEST_SKIPPED_ITSELF;
}

View File

@@ -50,13 +50,14 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
virtual cl_int SetUp(int elements) override
{
BasicCommandBufferTest::SetUp(elements);
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
cl_int error = init_extension_functions();
error = init_extension_functions();
test_error(error, "Unable to initialise extension functions");
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
if (simultaneous_use_requested)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
@@ -90,10 +91,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
if (extension_version != CL_MAKE_VERSION(0, 9, 4))
{
log_info("cl_khr_command_buffer_mutable_dispatch version "
"0.9.3 is "
"0.9.4 is "
"required to run the test, skipping.\n ");
extension_avaliable = false;
}
@@ -128,6 +129,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
}
clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
bool simultaneous_use_requested = false;
const char* kernelString = "__kernel void empty() {}";
const size_t global_work_size = 4 * 16;

View File

@@ -35,9 +35,7 @@ struct IterativeArgUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{

View File

@@ -33,9 +33,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command_pri(nullptr), command_sec(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{
@@ -47,7 +45,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
// require mutable arguments capabillity
// require mutable arguments capability
return !mutable_support;
}

View File

@@ -34,9 +34,7 @@ struct OverwriteUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{

View File

@@ -21,10 +21,12 @@
#include <CL/cl.h>
#include <CL/cl_ext.h>
////////////////////////////////////////////////////////////////////////////////
// mutable dispatch tests which handle following cases:
// - out-of-order queue use
// mutable dispatch tests which handles
// - out-of-order queue with dependencies between command-buffer enqueues
// - out-of-order queue with simultaneous use
// - in-order queue with dependencies between command-buffer enqueues
// - in-order queue with simultaneous use
// - cross queue with dependencies between command-buffer enqueues
// - cross-queue with simultaneous use
namespace {
@@ -35,11 +37,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
SimultaneousMutableDispatchTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
work_queue(nullptr), work_command_buffer(this), user_event(nullptr),
wait_pass_event(nullptr), command(nullptr)
work_queue(nullptr), work_command_buffer(this), new_in_mem(nullptr),
command(nullptr)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
}
cl_int SetUpKernel() override
@@ -48,26 +49,36 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
// create additional kernel to properly prepare output buffer for test
const char* kernel_str =
const char *kernel_str =
R"(
__kernel void fill(int pattern, __global int* out, __global int*
offset)
__kernel void mul(__global int* out, __global int* in, int mul_val)
{
size_t id = get_global_id(0);
size_t ind = offset[0] + id ;
out[ind] = pattern;
out[id] = in[id] * mul_val;
})";
error = create_single_kernel_helper_create_program(
context, &program_fill, 1, &kernel_str);
context, &program_mul, 1, &kernel_str);
test_error(error, "Failed to create program with source");
error =
clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
clBuildProgram(program_mul, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
kernel_fill = clCreateKernel(program_fill, "fill", &error);
test_error(error, "Failed to create copy kernel");
kernel_mul = clCreateKernel(program_mul, "mul", &error);
test_error(error, "Failed to create multiply kernel");
new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
new_in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS;
}
@@ -77,14 +88,13 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
&overwritten_pattern);
error = clSetKernelArg(kernel_mul, 0, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
error = clSetKernelArg(kernel_mul, 1, sizeof(off_mem), &in_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
error = clSetKernelArg(kernel_mul, 2, sizeof(cl_int), &pattern_pri);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
@@ -101,30 +111,28 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
&error);
test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr prop =
CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
else
{
work_queue = queue;
work_command_buffer = command_buffer;
}
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_requested)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
@@ -145,293 +153,245 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|| !mutable_support;
}
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_mul, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Serialize command-buffer enqueue, is a linear sequence of
* commands, with dependencies enforced using an in-order queue
* or cl_event dependencies.
*
* 1. Fill input buffer
* 2. Enqueue command-buffer doing: `output = a * input;
* 3. Read output buffer to host data so it can be verified later
* - Update command to new input buffer, new `a` val and use output
* buffer from previous invocation as new input buffer.
* 4. Enqueue command-buffer again.
* 5. Read new output buffer back to host data so it can be verified
* later
*
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE, 0,
data_size(), first_enqueue_output.data(),
(out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueReadBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &out_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Simultaneous command-buffer pass enqueues a command-buffer twice
* without dependencies between the enqueues, but an update so that
* all the parameters are different to avoid race conditions in the
* kernel execution. The asynchronous task graph looks like:
*
* (Fill input A buffer) (Fill input B buffer)
* | |
* (Enqueue command_buffer) (Enqueue updated command_buffer)
* | |
* (Read output A buffer) (Read output B buffer)
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueFillBuffer(work_queue, new_in_mem, &pattern_fill_2,
sizeof(cl_int), 0, data_size(), 0, nullptr,
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &new_in_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, out_mem, CL_FALSE, 0, data_size(),
first_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int VerifySerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * first_enqueue_ref;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int VerifySimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * pattern_fill_2;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = CL_SUCCESS;
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support)
std::vector<cl_int> first_enqueue_output(num_elements);
std::vector<cl_int> second_enqueue_output(num_elements);
if (simultaneous_use_requested)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
error = RunSimultaneousPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "RunSimultaneousPass failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
error =
RunSerializedPass(first_enqueue_output, second_enqueue_output);
test_error(error, "RunSerializedPass failed");
}
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(work_command_buffer, nullptr, nullptr,
out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSingle()
{
cl_int error;
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error =
clEnqueueReadBuffer(work_queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 1, &single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
cl_int RecordSimultaneousCommandBuffer()
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
&sync_points[0], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
// to avoid overwriting the entire result buffer instead of filling
// only relevant part this additional kernel was introduced
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_fill, 1, nullptr,
&num_elements, nullptr, 0, nullptr, &sync_points[1], &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
std::vector<cl_int> updated_output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
work_queue, off_mem, &pd.offset, sizeof(cl_int), 0, sizeof(cl_int),
(wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
// Retain new output memory object until the end of the test.
retained_output_buffers.push_back(new_out_mem);
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.updated_output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneous()
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
wait_pass_event = pass.wait_events[2];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(work_queue);
test_error(error, "clFinish failed");
// verify the result buffers
auto& first_pass_output = simul_passes[0].output_buffer;
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
auto& second_pass_output = simul_passes[1].output_buffer;
auto& second_pass_updated_output =
simul_passes[1].updated_output_buffer;
for (size_t i = 0; i < num_elements; i++)
if (simultaneous_use_requested)
{
// First pass:
// Before updating, out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
i);
// Second pass:
// Before updating, out_mem is filled with overwritten_pattern
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
i);
error = VerifySimultaneousPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "VerifySimultaneousPass failed");
}
else
{
error = VerifySerializedPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "VerifySerializedPass failed");
}
return CL_SUCCESS;
@@ -440,22 +400,20 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
clCommandQueueWrapper work_queue;
clCommandBufferWrapper work_command_buffer;
clEventWrapper user_event;
clEventWrapper single_event;
clEventWrapper wait_pass_event;
clKernelWrapper kernel_mul;
clProgramWrapper program_mul;
clKernelWrapper kernel_fill;
clProgramWrapper program_fill;
clMemWrapper new_out_mem, new_in_mem;
std::vector<clMemWrapper> retained_output_buffers;
const size_t test_global_work_size = 3 * sizeof(cl_int);
const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC;
const cl_int pattern_fill = 0xA;
const cl_int pattern_fill_2 = -3;
const cl_int overwritten_pattern = 0xACDC;
cl_mutable_command_khr command;
};
template <bool simultaneous_use_request>
struct CrossQueueSimultaneousMutableDispatchTest
: public BasicMutableCommandBufferTest
{
@@ -463,9 +421,9 @@ struct CrossQueueSimultaneousMutableDispatchTest
cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
queue_sec(nullptr), command(nullptr)
queue_sec(nullptr), new_out_mem(nullptr), command(nullptr)
{
simultaneous_use_requested = true;
simultaneous_use_requested = simultaneous_use_request;
}
cl_int SetUpKernel() override
@@ -488,6 +446,11 @@ struct CrossQueueSimultaneousMutableDispatchTest
kernel = clCreateKernel(program, "fill", &error);
test_error(error, "Failed to create copy kernel");
new_out_mem =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements, nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS;
}
@@ -530,24 +493,18 @@ struct CrossQueueSimultaneousMutableDispatchTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !simultaneous_use_support || !mutable_support;
return (simultaneous_use_requested && !simultaneous_use_support)
|| !mutable_support;
}
cl_int Run() override
{
// record command buffer
cl_int pattern = 0;
cl_int error = clCommandFillBufferKHR(
command_buffer, nullptr, nullptr, out_mem, &pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -555,16 +512,15 @@ struct CrossQueueSimultaneousMutableDispatchTest
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// enqueue command buffer to default queue
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
// If we are testing not using simultaneous-use then we need to use
// an event to serialize the execution order to the command-buffer
// submission to each queue.
clEventWrapper E;
error = clEnqueueCommandBufferKHR(
0, nullptr, command_buffer, 0, nullptr,
(simultaneous_use_requested ? nullptr : &E));
test_error(error, "clEnqueueCommandBufferKHR failed");
// update mutable parameters
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
data_size(), nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_0{ 0, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
@@ -594,30 +550,35 @@ struct CrossQueueSimultaneousMutableDispatchTest
test_error(error, "clUpdateMutableCommandsKHR failed");
// enqueue command buffer to non-default queue
error = clEnqueueCommandBufferKHR(1, &queue_sec, command_buffer, 0,
nullptr, nullptr);
error = clEnqueueCommandBufferKHR(
1, &queue_sec, command_buffer, (simultaneous_use_requested ? 0 : 1),
(simultaneous_use_requested ? nullptr : &E), nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue_sec);
test_error(error, "clFinish failed");
// read result of command buffer execution
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
std::vector<cl_int> sec_output_data(num_elements);
error =
clEnqueueReadBuffer(queue_sec, new_out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
sec_output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// verify the result
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i);
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
CHECK_VERIFICATION_ERROR(pattern_sec, sec_output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper queue_sec;
clMemWrapper new_out_mem;
const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC;
cl_mutable_command_khr command;
@@ -637,14 +598,26 @@ REGISTER_TEST(mutable_dispatch_simultaneous_out_of_order)
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_in_order)
{
return MakeAndRunTest<SimultaneousMutableDispatchTest<false, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_in_order)
{
return MakeAndRunTest<SimultaneousMutableDispatchTest<true, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
REGISTER_TEST(mutable_dispatch_cross_queue)
{
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest>(
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
{
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<true>>(
device, context, queue, num_elements);
}

View File

@@ -79,11 +79,7 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue),
command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr),
out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr)
{
simultaneous_use_requested =
(event_mode == EventMode::RET_COMBUF_WAIT_FOR_COMBUF) ? true
: false;
}
{}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
@@ -159,9 +155,6 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
{
if (BasicCommandBufferTest::Skip()) return true;
if (simultaneous_use_requested && !simultaneous_use_support)
return true;
if (out_of_order_requested && !out_of_order_support) return true;
return false;

View File

@@ -48,6 +48,39 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue)
{}
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
return !simultaneous_use_support
|| !(is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME));
}
return false;
}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp() failed");
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
@@ -237,33 +270,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed");
error = clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
clEventWrapper execute_event;
// enqueued command buffer blocked on user event
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// execute command buffer
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "verify_state failed");
test_error(signal_error, "clSetUserEventStatus failed");
error = clWaitForEvents(1, &execute_event);
test_error(error, "Unable to wait for execute event");
// verify executable state
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed");
return CL_SUCCESS;
}

View File

@@ -21,11 +21,9 @@
namespace {
////////////////////////////////////////////////////////////////////////////////
// out-of-order tests for cl_khr_command_buffer which handles below cases:
// -test case for out-of-order command-buffer
// -test an out-of-order command-buffer with simultaneous use
// Tests for cl_khr_command_buffer which handles submitting a command-buffer to
// an out-of-order queue.
template <bool simultaneous_request>
struct OutOfOrderTest : public BasicCommandBufferTest
{
OutOfOrderTest(cl_device_id device, cl_context context,
@@ -35,18 +33,11 @@ struct OutOfOrderTest : public BasicCommandBufferTest
user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
program_fill(nullptr)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
buffer_size_multiplier = 2; // two enqueues of command-buffer
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernel();
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
@@ -74,14 +65,8 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
@@ -98,7 +83,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
@@ -108,110 +92,23 @@ struct OutOfOrderTest : public BasicCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0
};
if (simultaneous_use_requested && simultaneous_use_support)
properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
out_of_order_command_buffer = clCreateCommandBufferKHR(
1, &out_of_order_queue, properties, &error);
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
if (!out_of_order_support
|| (simultaneous_use_requested && !simultaneous_use_support))
return true;
return false;
return !out_of_order_support;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
cl_int RecordCommandBuffer() const
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
nullptr, out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&user_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer() const
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
// fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
@@ -236,79 +133,63 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
// 0: offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[2];
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
cl_int error = clEnqueueFillBuffer(
out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
&pd.wait_events[0]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
&pd.wait_events[2]);
0, nullptr, out_of_order_command_buffer, 1, &pd.wait_events[0],
&pd.wait_events[1]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
&pd.wait_events[1], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int Run() override
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
error = EnqueuePass(pass);
test_error(error, "EnqueuePass failed");
wait_pass_event = pass.wait_events[2];
wait_pass_event = pass.wait_events[1];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(out_of_order_queue);
test_error(error, "clFinish failed");
// verify the result buffers
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
@@ -320,7 +201,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
@@ -338,12 +218,5 @@ struct OutOfOrderTest : public BasicCommandBufferTest
REGISTER_TEST(out_of_order)
{
return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
num_elements);
}
REGISTER_TEST(simultaneous_out_of_order)
{
return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
num_elements);
return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
}

View File

@@ -0,0 +1,321 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "basic_command_buffer.h"
#include <vector>
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests for multiple sequential submissions of a command-buffer without a
// blocking wait between them, but using the following mechanisms to serialize
// execution of the submissions.
// * In-order queue dependencies
// * Event dependencies in command-buffer submissions to an out-of-order queue
// * Barrier submissions between command-buffer submissions to an out-of-order
// queue
// Base class that individual test fixtures are derived from
struct CommandBufferPipelined : public BasicCommandBufferTest
{
CommandBufferPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue)
{}
cl_int SetUpKernel() override
{
const char* mul_kernel_str =
R"(
__kernel void mul_by_val(int in, __global int* data)
{
size_t id = get_global_id(0);
data[id] *= in;
}
__kernel void increment(__global int* data)
{
size_t id = get_global_id(0);
data[id]++;
})";
cl_int error = create_single_kernel_helper_create_program(
context, &program, 1, &mul_kernel_str);
test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
mul_kernel = clCreateKernel(program, "mul_by_val", &error);
test_error(error, "Failed to create mul_by_val kernel");
inc_kernel = clCreateKernel(program, "increment", &error);
test_error(error, "Failed to create increment kernel");
return CL_SUCCESS;
}
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * buffer_size_multiplier
* sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_int val_arg = pattern;
error = clSetKernelArg(mul_kernel, 0, sizeof(cl_int), &val_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(mul_kernel, 1, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(inc_kernel, 0, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer(clCommandBufferWrapper& cmd_buf)
{
cl_int error = clCommandNDRangeKernelKHR(
cmd_buf, nullptr, nullptr, inc_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(cmd_buf);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Zero initialize buffer before starting test
cl_int zero_pattern = 0;
error =
clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
return CL_SUCCESS;
}
const cl_int pattern = 42;
clKernelWrapper inc_kernel = nullptr;
clKernelWrapper mul_kernel = nullptr;
};
struct InOrderPipelined : public CommandBufferPipelined
{
InOrderPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue)
{}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error =
clEnqueueNDRangeKernel(queue, mul_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
};
struct EventPipelined : public CommandBufferPipelined
{
EventPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &events[0]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1,
nullptr, &num_elements, nullptr, 1,
&events[0], &events[1]);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 1, &events[1], &events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
clEventWrapper events[3] = { nullptr, nullptr, nullptr };
};
struct BarrierPipelined : public CommandBufferPipelined
{
BarrierPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error =
clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 0, nullptr,
nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
};
} // anonymous namespace
REGISTER_TEST(pipeline_in_order_deps)
{
return MakeAndRunTest<InOrderPipelined>(device, context, queue,
num_elements);
}
REGISTER_TEST(pipeline_event_deps)
{
return MakeAndRunTest<EventPipelined>(device, context, queue, num_elements);
}
REGISTER_TEST(pipeline_barrier_deps)
{
return MakeAndRunTest<BarrierPipelined>(device, context, queue,
num_elements);
}

View File

@@ -44,27 +44,18 @@
namespace {
////////////////////////////////////////////////////////////////////////////////
// printf tests for cl_khr_command_buffer which handles below cases:
// -test cases for device side printf
// -test cases for device side printf with a simultaneous use command-buffer
// Test for cl_khr_command_buffer which handles a command-buffer containing a
// printf kernel being repeatedly enqueued.
template <bool simul_use>
struct CommandBufferPrintfTest : public BasicCommandBufferTest
{
CommandBufferPrintfTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue),
trigger_event(nullptr), wait_event(nullptr), file_descriptor(0),
printf_use_support(false)
: BasicCommandBufferTest(device, context, queue), file_descriptor(0)
{
simultaneous_use_requested = simul_use;
if (simul_use)
{
buffer_size_multiplier = num_test_iters;
}
buffer_size_multiplier = num_test_iters;
}
//--------------------------------------------------------------------------
void ReleaseOutputStream(int fd)
{
fflush(stdout);
@@ -72,7 +63,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
close(fd);
}
//--------------------------------------------------------------------------
int AcquireOutputStream(int* error)
{
int fd = streamDup(fileno(stdout));
@@ -85,7 +75,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return fd;
}
//--------------------------------------------------------------------------
void GetAnalysisBuffer(std::stringstream& buffer)
{
std::ifstream fp(temp_filename, std::ios::in);
@@ -95,7 +84,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
}
}
//--------------------------------------------------------------------------
void PurgeTempFile()
{
std::ofstream ofs(temp_filename,
@@ -103,9 +91,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
ofs.close();
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
// Query if device supports kernel printf use
cl_device_command_buffer_capabilities_khr capabilities;
cl_int error =
@@ -114,16 +103,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
printf_use_support =
const bool printf_use_support =
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
!= 0;
if (!printf_use_support) return true;
return BasicCommandBufferTest::Skip()
|| (simultaneous_use_requested && !simultaneous_use_support);
return !printf_use_support;
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
cl_int error = CL_SUCCESS;
@@ -153,14 +139,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
size_t data_size() const override
{
return sizeof(cl_char) * num_elements * buffer_size_multiplier
* max_pattern_length;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
@@ -192,7 +176,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
auto pcFname = get_temp_filename();
@@ -209,39 +192,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
// record command buffer with primary queue
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with printf calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with printf calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
std::remove(temp_filename.c_str());
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -251,7 +205,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
#define test_error_release_stdout(errCode, msg) \
{ \
auto errCodeResult = errCode; \
@@ -263,96 +216,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} \
}
//--------------------------------------------------------------------------
cl_int EnqueueSinglePass(const std::vector<cl_char>& pattern,
std::vector<cl_char>& output_data)
{
cl_int error = CL_SUCCESS;
auto in_mem_size = sizeof(cl_char) * pattern.size();
error = clEnqueueWriteBuffer(queue, in_mem, CL_TRUE, 0, in_mem_size,
&pattern[0], 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
test_assert_error(pattern.size() - 1 <= CL_UINT_MAX,
"pattern.size() - 1 does not fit in a cl_uint");
cl_uint offset[] = { 0, static_cast<cl_uint>(pattern.size() - 1) };
error = clEnqueueWriteBuffer(queue, off_mem, CL_TRUE, 0, sizeof(offset),
offset, 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
// redirect output stream to temporary file
file_descriptor = AcquireOutputStream(&error);
if (error != 0)
{
log_error("Error while redirection stdout to file");
return TEST_FAIL;
}
// enqueue command buffer with kernel containing printf command
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &wait_event);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
fflush(stdout);
// Wait until kernel finishes its execution and (thus) the output
// printed from the kernel is immediately printed
error = clWaitForEvents(1, &wait_event);
test_error(error, "clWaitForEvents failed");
// output buffer contains pattern to be compared with printout
error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueReadBuffer failed");
error = clFinish(queue);
test_error_release_stdout(error, "clFinish failed");
ReleaseOutputStream(file_descriptor);
// copy content of temporary file into string stream
std::stringstream sstr;
GetAnalysisBuffer(sstr);
if (sstr.str().size() != num_elements * offset[1])
{
log_error("GetAnalysisBuffer failed\n");
return TEST_FAIL;
}
// verify the result - compare printout and output buffer
for (size_t i = 0; i < num_elements * offset[1]; i++)
{
CHECK_VERIFICATION_ERROR(sstr.str().at(i), output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_char> output_data(num_elements * max_pattern_length);
for (unsigned i = 0; i < num_test_iters; i++)
{
unsigned pattern_length =
std::max(min_pattern_length, rand() % max_pattern_length);
char pattern_character = 'a' + rand() % 26;
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern[pattern_length] = '\0';
error = EnqueueSinglePass(pattern, output_data);
test_error(error, "EnqueueSinglePass failed");
output_data.assign(output_data.size(), 0);
PurgeTempFile();
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
// null terminated character buffer
std::vector<cl_char> pattern;
@@ -361,8 +225,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> output_buffer;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
// write current pattern to device memory
auto in_mem_size = sizeof(cl_char) * pd.pattern.size();
@@ -377,15 +240,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pd.offset, 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
// create user event to block simultaneous command buffers
if (!trigger_event)
{
trigger_event = clCreateUserEvent(context, &error);
test_error_release_stdout(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
// output buffer contains pattern to be compared with printout
@@ -398,14 +254,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int Run() override
{
cl_int error = CL_SUCCESS;
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length);
std::vector<SimulPassData> simul_passes(num_test_iters);
std::vector<EnqueuePassData> enqueue_passes(num_test_iters);
const int pattern_chars_range = 26;
std::list<cl_char> pattern_chars;
@@ -413,7 +269,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pattern_chars.push_back(cl_char('a' + i));
test_assert_error(pattern_chars.size() >= num_test_iters,
"Number of simultaneous launches must be lower than "
"Number of launches must be lower than "
"size of characters container");
cl_int total_pattern_coverage = 0;
@@ -428,11 +284,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern.back() = '\0';
simul_passes[i] = { pattern,
{ cl_int(i * offset), cl_int(pattern_length) },
std::vector<cl_char>(num_elements
* pattern_length) };
total_pattern_coverage += simul_passes[i].output_buffer.size();
enqueue_passes[i] = {
pattern,
{ cl_int(i * offset), cl_int(pattern_length) },
std::vector<cl_char>(num_elements * pattern_length)
};
total_pattern_coverage += enqueue_passes[i].output_buffer.size();
pattern_chars.erase(it);
};
@@ -444,17 +301,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return TEST_FAIL;
}
// enqueue read/write and command buffer operations
for (auto&& pass : simul_passes)
// enqueue read/write and command buffer operations, serialized
// by in-order queue
for (auto&& pass : enqueue_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error_release_stdout(error, "EnqueueSimultaneousPass failed");
error = EnqueuePass(pass);
test_error_release_stdout(error, "EnqueuePass failed");
}
// execute command buffers
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error_release_stdout(error, "clSetUserEventStatus failed");
// flush streams
fflush(stdout);
@@ -477,13 +331,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
for (int i = 0; i < total_pattern_coverage; i++)
counters_map[sstr.str().at(i)]++;
if (counters_map.size() != simul_passes.size())
if (counters_map.size() != enqueue_passes.size())
{
log_error("printout inconsistent with input data\n");
return TEST_FAIL;
}
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
@@ -501,18 +355,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
}
}
std::remove(temp_filename.c_str());
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
clEventWrapper wait_event = nullptr;
std::string temp_filename;
int file_descriptor;
bool printf_use_support;
// specifies max test length for printf pattern
const unsigned max_pattern_length = 6;
// specifies min test length for printf pattern
@@ -523,14 +372,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} // anonymous namespace
REGISTER_TEST(basic_printf)
REGISTER_TEST(printf)
{
return MakeAndRunTest<CommandBufferPrintfTest<false>>(device, context,
queue, num_elements);
}
REGISTER_TEST(simultaneous_printf)
{
return MakeAndRunTest<CommandBufferPrintfTest<true>>(device, context, queue,
num_elements);
return MakeAndRunTest<CommandBufferPrintfTest>(device, context, queue,
num_elements);
}

View File

@@ -86,21 +86,17 @@ cl_int VerifyResult(const clEventWrapper& event)
}
////////////////////////////////////////////////////////////////////////////////
// Command-buffer profiling test cases:
// -all commands are recorded to a single command-queue
// -profiling a command-buffer with simultaneous use
template <bool simultaneous_request>
// Command-buffer profiling test for enqueuing command-buffer twice and checking
// the profiling counters on the events returned.
struct CommandBufferProfiling : public BasicCommandBufferTest
{
CommandBufferProfiling(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
buffer_size_multiplier = 2; // Do two enqueues of command-buffer
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
@@ -127,10 +123,9 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
"Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
return true;
}
return (simultaneous_use_requested && !simultaneous_use_support);
return false;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
@@ -156,37 +151,45 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
struct EnqueuePassData
{
cl_int offset;
clEventWrapper query_event;
};
cl_int Run() override
{
cl_int error = CL_SUCCESS;
// record command buffer
error = RecordCommandBuffer();
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_requested)
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, clEventWrapper() }, { offset, clEventWrapper() }
};
// In-order queue serialized the command-buffer submissions
for (auto&& pass : enqueue_passes)
{
// enqueue simultaneous command-buffers with profiling command queue
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
error = EnqueuePass(pass);
test_error(error, "EnqueueSerializedPass failed");
}
else
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : enqueue_passes)
{
// enqueue single command-buffer with profiling command queue
error = RunSingle();
test_error(error, "RunSingle failed");
error = VerifyResult(pass.query_event);
test_error(error, "VerifyResult failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -196,41 +199,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper query_event;
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &query_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = VerifyResult(query_event);
test_error(error, "VerifyResult failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
clEventWrapper query_event;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pattern, sizeof(cl_int),
@@ -241,59 +210,13 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!wait_event)
{
wait_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&wait_event, &pd.query_event);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &pd.query_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
}
error = clSetUserEventStatus(wait_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : simul_passes)
{
error = VerifyResult(pass.query_event);
test_error(error, "VerifyResult failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper wait_event;
const cl_int pattern = 0xA;
};
@@ -356,19 +279,13 @@ struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
};
} // anonymous namespace
REGISTER_TEST(basic_profiling)
REGISTER_TEST(profiling)
{
return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
num_elements);
return MakeAndRunTest<CommandBufferProfiling>(device, context, queue,
num_elements);
}
REGISTER_TEST(simultaneous_profiling)
{
return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
num_elements);
}
REGISTER_TEST(substitute_queue_profiling)
REGISTER_TEST(profiling_substitute_queue)
{
return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
device, context, queue, num_elements);

View File

@@ -23,21 +23,16 @@ namespace {
// Command-queue substitution tests which handles below cases:
// -substitution on queue without properties
// -substitution on queue with properties
// -simultaneous use queue substitution
template <bool prop_use, bool simul_use>
template <bool prop_use>
struct SubstituteQueueTest : public BasicCommandBufferTest
{
SubstituteQueueTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue),
properties_use_requested(prop_use), user_event(nullptr)
{
simultaneous_use_requested = simul_use;
if (simul_use) buffer_size_multiplier = 2;
}
properties_use_requested(prop_use)
{}
//--------------------------------------------------------------------------
bool Skip() override
{
if (properties_use_requested)
@@ -57,11 +52,9 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return true;
}
return BasicCommandBufferTest::Skip()
|| (simultaneous_use_requested && !simultaneous_use_support);
return BasicCommandBufferTest::Skip();
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
// By default command queue is created without properties,
@@ -81,7 +74,6 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
cl_int Run() override
{
// record command buffer with primary queue
@@ -106,23 +98,14 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
test_error(error, "clCreateCommandQueue failed");
}
if (simultaneous_use_support)
{
// enque simultaneous command-buffers with substitute queue
error = RunSimultaneous(new_queue);
test_error(error, "RunSimultaneous failed");
}
else
{
// enque single command-buffer with substitute queue
error = RunSingle(new_queue);
test_error(error, "RunSingle failed");
}
// enqueue single command-buffer with substitute queue
error = RunSingle(new_queue);
test_error(error, "RunSingle failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
@@ -135,14 +118,13 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle(const cl_command_queue& q)
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
cl_int error =
clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
cl_command_queue queues[] = { q };
@@ -165,90 +147,8 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int pattern;
cl_int offset;
cl_command_queue queue;
std::vector<cl_int> output_buffer;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = clEnqueueFillBuffer(
pd.queue, in_mem, &pd.pattern, sizeof(cl_int),
pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error =
clEnqueueFillBuffer(pd.queue, off_mem, &pd.offset, sizeof(cl_int),
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
cl_command_queue queues[] = { pd.queue };
error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
pd.queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int),
data_size(), pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous(const cl_command_queue& q)
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ pattern_pri, 0, q, std::vector<cl_int>(num_elements) },
{ pattern_sec, offset, q, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueuePass failed");
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
for (auto&& pass : simul_passes)
{
error = clFinish(pass.queue);
test_error(error, "clFinish failed");
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pass.pattern, res_data[i], i);
}
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
const cl_int pattern_pri = 0xB;
const cl_int pattern_sec = 0xC;
bool properties_use_requested;
clEventWrapper user_event;
};
// Command-queue substitution tests which handles below cases:
@@ -397,20 +297,14 @@ struct QueueOrderTest : public BasicCommandBufferTest
REGISTER_TEST(queue_substitution)
{
return MakeAndRunTest<SubstituteQueueTest<false, false>>(
device, context, queue, num_elements);
return MakeAndRunTest<SubstituteQueueTest<false>>(device, context, queue,
num_elements);
}
REGISTER_TEST(properties_queue_substitution)
REGISTER_TEST(queue_substitution_properties)
{
return MakeAndRunTest<SubstituteQueueTest<true, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(simultaneous_queue_substitution)
{
return MakeAndRunTest<SubstituteQueueTest<false, true>>(
device, context, queue, num_elements);
return MakeAndRunTest<SubstituteQueueTest<true>>(device, context, queue,
num_elements);
}
REGISTER_TEST(queue_substitute_in_order)

View File

@@ -22,25 +22,22 @@ namespace {
////////////////////////////////////////////////////////////////////////////////
// clSetKernelArg tests for cl_khr_command_buffer which handles below cases:
// -test interactions of clSetKernelArg with command-buffers
// -test interactions of clSetKernelArg on a command-buffer pending execution
// -test interactions of clSetKernelArg after command-buffer finalize but
// before enqueue
// -test interactions of clSetKernelArg between command-buffer enqueue
template <bool simul_use>
template <bool enqueue_test>
struct CommandBufferSetKernelArg : public BasicCommandBufferTest
{
CommandBufferSetKernelArg(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), trigger_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{
simultaneous_use_requested = simul_use;
if (simul_use) buffer_size_multiplier = 2;
if (enqueue_test) buffer_size_multiplier = 2;
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
cl_int error = CL_SUCCESS;
const char* kernel_str =
R"(
__kernel void copy(int in, __global int* out, __global int* offset)
@@ -50,8 +47,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
out[ind] = in;
})";
error = create_single_kernel_helper_create_program(context, &program, 1,
&kernel_str);
cl_int error = create_single_kernel_helper_create_program(
context, &program, 1, &kernel_str);
test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
@@ -63,7 +60,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
@@ -99,15 +95,14 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_requested)
if (enqueue_test)
{
// enqueue simultaneous command-buffers with clSetKernelArg calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
// enqueue command-buffers with clSetKernelArg calls in between
error = RunMultipleEnqueue();
test_error(error, "RunMultipleEnqueue failed");
}
else
{
@@ -119,12 +114,9 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -148,14 +140,12 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
// record command buffer
error = RecordCommandBuffer();
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
const cl_int pattern_base = 0;
@@ -187,20 +177,16 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
cl_int pattern;
cl_int offset;
std::vector<cl_int> output_buffer;
};
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer() const
cl_int RecordEnqueueCommandBuffer() const
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -210,8 +196,7 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pd.pattern, sizeof(cl_int),
@@ -222,14 +207,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!trigger_event)
{
trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
@@ -240,49 +219,39 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int RunMultipleEnqueue()
{
cl_int error = CL_SUCCESS;
// record command buffer with primary queue
error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int error = RecordEnqueueCommandBuffer();
test_error(error, "RecordEnqueeuCommandBuffer failed");
std::vector<SimulPassData> simul_passes = {
{ 0, 0, std::vector<cl_int>(num_elements) }
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, 0, std::vector<cl_int>(num_elements) },
{ 1, offset, std::vector<cl_int>(num_elements) }
};
error = EnqueueSimultaneousPass(simul_passes.front());
test_error(error, "EnqueueSimultaneousPass 1 failed");
// changing kernel args at this point should have no effect,
// test will verify if clSetKernelArg didn't affect command-buffer
cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "clSetKernelArg failed");
if (simultaneous_use_support)
for (auto&& pass : enqueue_passes)
{
cl_int offset = static_cast<cl_int>(num_elements);
simul_passes.push_back(
{ 1, offset, std::vector<cl_int>(num_elements) });
// changing kernel args at this point should have no effect,
// test will verify if clSetKernelArg didn't affect command-buffer
cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = EnqueueSimultaneousPass(simul_passes.back());
test_error(error, "EnqueueSimultaneousPass 2 failed");
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "clSetKernelArg failed");
error = EnqueuePass(pass);
test_error(error, "EnqueuePass failed");
}
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
// verify the result buffer
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
@@ -294,9 +263,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
const cl_int pattern_pri = 2;
const cl_int pattern_sec = 3;
@@ -305,13 +271,13 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
} // anonymous namespace
REGISTER_TEST(basic_set_kernel_arg)
REGISTER_TEST(set_kernel_arg_after_finalize)
{
return MakeAndRunTest<CommandBufferSetKernelArg<false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(pending_set_kernel_arg)
REGISTER_TEST(set_kernel_arg_after_enqueue)
{
return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context,
queue, num_elements);

View File

@@ -131,9 +131,10 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true;
bool skip = true;
if (simultaneous_use_support)
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false;
}
else if (is_extension_available(
@@ -142,13 +143,6 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
skip = false;
}
else if (is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false;
}
return skip;
}
@@ -185,7 +179,9 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true;
bool skip = true;
if (!simultaneous_use_support)
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME)
&& !simultaneous_use_support)
{
unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
skip = false;

View File

@@ -66,102 +66,6 @@ struct EnqueueCommandBufferNotFinalized : public BasicCommandBufferTest
}
};
// CL_INVALID_OPERATION if command_buffer was not created with the
// CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR flag and is in the Pending state.
struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
: public BasicCommandBufferTest
{
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState(
cl_device_id device, cl_context context, cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
{}
cl_int Run() override
{
cl_int error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clEnqueueCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
clFinish(queue);
return CL_SUCCESS;
}
cl_int SetUp(int elements) override
{
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
&state, nullptr);
test_error_ret(error, "clGetCommandBufferInfoKHR failed",
TEST_FAIL);
test_assert_error(
state == expected,
"Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query!");
return TEST_PASS;
};
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "State is not Executable");
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
};
// CL_INVALID_VALUE if queues is NULL and num_queues is > 0, or queues is not
// NULL and num_queues is 0.
struct EnqueueCommandBufferNullQueuesNumQueues : public BasicCommandBufferTest
@@ -623,14 +527,6 @@ REGISTER_TEST(negative_enqueue_command_buffer_not_finalized)
device, context, queue, num_elements);
}
REGISTER_TEST(
negative_enqueue_command_buffer_without_simultaneous_no_pending_state)
{
return MakeAndRunTest<
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState>(
device, context, queue, num_elements);
}
REGISTER_TEST(negative_enqueue_command_buffer_null_queues_num_queues)
{
return MakeAndRunTest<EnqueueCommandBufferNullQueuesNumQueues>(

View File

@@ -44,20 +44,9 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
FinalizeCommandBufferNotRecordingState(cl_device_id device,
cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
return CL_SUCCESS;
}
cl_int Run() override
{
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
@@ -87,18 +76,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
"CL_INVALID_OPERATION",
TEST_FAIL);
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clFinalizeCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
clSetUserEventStatus(user_event, CL_COMPLETE);
clFinish(queue);
return CL_SUCCESS;
}
@@ -114,22 +91,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
return CL_SUCCESS;
}
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
};
};