From 590321d98d0146591ad15fc94b2344d95e560912 Mon Sep 17 00:00:00 2001 From: ouakheli <53617630+ouakheli@users.noreply.github.com> Date: Sun, 30 Aug 2020 13:51:38 +0100 Subject: [PATCH] Add test for CL_PROFILING_COMMAND_COMPLETE (#841) The test checks profiling value CL_PROFILING_COMMAND_COMPLETE compared to CL_PROFILING_COMMAND_END for two cases: 1) kernel with children with different levels of nesting 2) kernel without children --- .../device_execution/CMakeLists.txt | 1 + .../device_execution/enqueue_profiling.cpp | 171 ++++++++++++++++++ test_conformance/device_execution/main.cpp | 17 +- test_conformance/device_execution/procs.h | 2 + 4 files changed, 180 insertions(+), 11 deletions(-) create mode 100644 test_conformance/device_execution/enqueue_profiling.cpp diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt index d99ba217..5e9e30e3 100644 --- a/test_conformance/device_execution/CMakeLists.txt +++ b/test_conformance/device_execution/CMakeLists.txt @@ -8,6 +8,7 @@ set(DEVICE_EXECUTION_SOURCES enqueue_multi_queue.cpp enqueue_ndrange.cpp enqueue_wg_size.cpp + enqueue_profiling.cpp execute_block.cpp host_multi_queue.cpp host_queue_order.cpp diff --git a/test_conformance/device_execution/enqueue_profiling.cpp b/test_conformance/device_execution/enqueue_profiling.cpp new file mode 100644 index 00000000..75aeb4ec --- /dev/null +++ b/test_conformance/device_execution/enqueue_profiling.cpp @@ -0,0 +1,171 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include +#include +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" + +#include + +#include "procs.h" +#include "utils.h" +#include + +static int max_nestingLevel = 10; + +static const char* enqueue_multi_level = R"( + void block_fn(__global int* res, int level) + { + queue_t def_q = get_default_queue(); + if(--level < 0) return; + void (^kernelBlock)(void) = ^{ block_fn(res, level); }; + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if(enq_res != CLK_SUCCESS) { (*res) = -1; return; } + else if(*res != -1) { (*res)++; } + } + kernel void enqueue_multi_level(__global int* res, int level) + { + *res = 0; + block_fn(res, level); + })"; + +int test_enqueue_profiling(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + cl_int err_ret, res = 0; + clCommandQueueWrapper dev_queue; + clCommandQueueWrapper host_queue; + + cl_uint maxQueueSize = 0; + err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, + sizeof(maxQueueSize), &maxQueueSize, 0); + test_error(err_ret, + "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed"); + + cl_queue_properties dev_queue_prop_def[] = { + CL_QUEUE_PROPERTIES, + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE + | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE, + CL_QUEUE_SIZE, maxQueueSize, 0 + }; + + dev_queue = clCreateCommandQueueWithProperties( + context, device, dev_queue_prop_def, &err_ret); + test_error(err_ret, + "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_" + "DEFAULT) failed"); + + cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES, + CL_QUEUE_PROFILING_ENABLE, + 0 }; + + host_queue = clCreateCommandQueueWithProperties( + context, device, host_queue_prop_def, &err_ret); + test_error(err_ret, + "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_" + "DEFAULT) failed"); + + cl_int status; + size_t size = 1; + cl_int result = 0; + + clMemWrapper res_mem; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_event kernel_event; + + err_ret = create_single_kernel_helper_with_build_options( + context, &program, &kernel, 1, &enqueue_multi_level, + "enqueue_multi_level", "-cl-std=CL2.0"); + if (check_error(err_ret, "Create single kernel failed")) return -1; + + res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(result), &result, &err_ret); + test_error(err_ret, "clCreateBuffer() failed"); + + err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem); + test_error(err_ret, "clSetKernelArg(0) failed"); + + for (int level = 0; level < max_nestingLevel; level++) + { + err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level); + test_error(err_ret, "clSetKernelArg(1) failed"); + + err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size, + &size, 0, NULL, &kernel_event); + test_error(err_ret, + "clEnqueueNDRangeKernel('enqueue_multi_level') failed"); + + err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0, + sizeof(result), &result, 0, NULL, NULL); + test_error(err_ret, "clEnqueueReadBuffer() failed"); + + if (result != level) + { + log_error("Kernel execution should return the maximum nesting " + " level (got %d instead of %d)", + result, level); + return -1; + } + + err_ret = + clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(status), &status, NULL); + test_error(err_ret, "clGetEventInfo() failed"); + + if (check_error(status, "Kernel execution status %d", status)) + return status; + + cl_ulong end; + err_ret = clGetEventProfilingInfo( + kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL); + test_error(err_ret, "clGetEventProfilingInfo() failed"); + + cl_ulong complete; + err_ret = + clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE, + sizeof(complete), &complete, NULL); + test_error(err_ret, "clGetEventProfilingInfo() failed"); + + if (level == 0) + { + if (end != complete) + { + log_error("Profiling END should be the same as COMPLETE for " + "kernels without children"); + return -1; + } + } + else + { + if (end > complete) + { + log_error("Profiling END should be smaller than COMPLETE for " + "kernels with device side children"); + return -1; + } + } + + log_info("Profiling info for '%s' kernel is OK for level %d.\n", + "enqueue_multi_level", level); + + clReleaseEvent(kernel_event); + } + + return res; +} diff --git a/test_conformance/device_execution/main.cpp b/test_conformance/device_execution/main.cpp index 8fc0b6c4..a3d0d8d0 100644 --- a/test_conformance/device_execution/main.cpp +++ b/test_conformance/device_execution/main.cpp @@ -58,17 +58,12 @@ test_status InitCL(cl_device_id device) { } test_definition test_list[] = { - ADD_TEST( device_info ), - ADD_TEST( device_queue ), - ADD_TEST( execute_block ), - ADD_TEST( enqueue_block ), - ADD_TEST( enqueue_nested_blocks ), - ADD_TEST( enqueue_wg_size ), - ADD_TEST( enqueue_flags ), - ADD_TEST( enqueue_multi_queue ), - ADD_TEST( host_multi_queue ), - ADD_TEST( enqueue_ndrange ), - ADD_TEST( host_queue_order ), + ADD_TEST(device_info), ADD_TEST(device_queue), + ADD_TEST(execute_block), ADD_TEST(enqueue_block), + ADD_TEST(enqueue_nested_blocks), ADD_TEST(enqueue_wg_size), + ADD_TEST(enqueue_flags), ADD_TEST(enqueue_multi_queue), + ADD_TEST(host_multi_queue), ADD_TEST(enqueue_ndrange), + ADD_TEST(host_queue_order), ADD_TEST(enqueue_profiling), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/device_execution/procs.h b/test_conformance/device_execution/procs.h index 8f668ed1..087dafc1 100644 --- a/test_conformance/device_execution/procs.h +++ b/test_conformance/device_execution/procs.h @@ -26,6 +26,8 @@ extern int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_ extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_enqueue_profiling(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);