mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Add test for CL_PROFILING_COMMAND_COMPLETE (#841)
The test checks profiling value CL_PROFILING_COMMAND_COMPLETE compared to CL_PROFILING_COMMAND_END for two cases: 1) kernel with children with different levels of nesting 2) kernel without children
This commit is contained in:
@@ -8,6 +8,7 @@ set(DEVICE_EXECUTION_SOURCES
|
||||
enqueue_multi_queue.cpp
|
||||
enqueue_ndrange.cpp
|
||||
enqueue_wg_size.cpp
|
||||
enqueue_profiling.cpp
|
||||
execute_block.cpp
|
||||
host_multi_queue.cpp
|
||||
host_queue_order.cpp
|
||||
|
||||
171
test_conformance/device_execution/enqueue_profiling.cpp
Normal file
171
test_conformance/device_execution/enqueue_profiling.cpp
Normal file
@@ -0,0 +1,171 @@
|
||||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
static int max_nestingLevel = 10;
|
||||
|
||||
static const char* enqueue_multi_level = R"(
|
||||
void block_fn(__global int* res, int level)
|
||||
{
|
||||
queue_t def_q = get_default_queue();
|
||||
if(--level < 0) return;
|
||||
void (^kernelBlock)(void) = ^{ block_fn(res, level); };
|
||||
ndrange_t ndrange = ndrange_1D(1);
|
||||
int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
|
||||
if(enq_res != CLK_SUCCESS) { (*res) = -1; return; }
|
||||
else if(*res != -1) { (*res)++; }
|
||||
}
|
||||
kernel void enqueue_multi_level(__global int* res, int level)
|
||||
{
|
||||
*res = 0;
|
||||
block_fn(res, level);
|
||||
})";
|
||||
|
||||
int test_enqueue_profiling(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
clCommandQueueWrapper host_queue;
|
||||
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
|
||||
sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret,
|
||||
"clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
cl_queue_properties dev_queue_prop_def[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
|
||||
| CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
|
||||
CL_QUEUE_SIZE, maxQueueSize, 0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(
|
||||
context, device, dev_queue_prop_def, &err_ret);
|
||||
test_error(err_ret,
|
||||
"clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
|
||||
"DEFAULT) failed");
|
||||
|
||||
cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_PROFILING_ENABLE,
|
||||
0 };
|
||||
|
||||
host_queue = clCreateCommandQueueWithProperties(
|
||||
context, device, host_queue_prop_def, &err_ret);
|
||||
test_error(err_ret,
|
||||
"clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
|
||||
"DEFAULT) failed");
|
||||
|
||||
cl_int status;
|
||||
size_t size = 1;
|
||||
cl_int result = 0;
|
||||
|
||||
clMemWrapper res_mem;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
cl_event kernel_event;
|
||||
|
||||
err_ret = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 1, &enqueue_multi_level,
|
||||
"enqueue_multi_level", "-cl-std=CL2.0");
|
||||
if (check_error(err_ret, "Create single kernel failed")) return -1;
|
||||
|
||||
res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(result), &result, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem);
|
||||
test_error(err_ret, "clSetKernelArg(0) failed");
|
||||
|
||||
for (int level = 0; level < max_nestingLevel; level++)
|
||||
{
|
||||
err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level);
|
||||
test_error(err_ret, "clSetKernelArg(1) failed");
|
||||
|
||||
err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size,
|
||||
&size, 0, NULL, &kernel_event);
|
||||
test_error(err_ret,
|
||||
"clEnqueueNDRangeKernel('enqueue_multi_level') failed");
|
||||
|
||||
err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0,
|
||||
sizeof(result), &result, 0, NULL, NULL);
|
||||
test_error(err_ret, "clEnqueueReadBuffer() failed");
|
||||
|
||||
if (result != level)
|
||||
{
|
||||
log_error("Kernel execution should return the maximum nesting "
|
||||
" level (got %d instead of %d)",
|
||||
result, level);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret =
|
||||
clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(status), &status, NULL);
|
||||
test_error(err_ret, "clGetEventInfo() failed");
|
||||
|
||||
if (check_error(status, "Kernel execution status %d", status))
|
||||
return status;
|
||||
|
||||
cl_ulong end;
|
||||
err_ret = clGetEventProfilingInfo(
|
||||
kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL);
|
||||
test_error(err_ret, "clGetEventProfilingInfo() failed");
|
||||
|
||||
cl_ulong complete;
|
||||
err_ret =
|
||||
clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE,
|
||||
sizeof(complete), &complete, NULL);
|
||||
test_error(err_ret, "clGetEventProfilingInfo() failed");
|
||||
|
||||
if (level == 0)
|
||||
{
|
||||
if (end != complete)
|
||||
{
|
||||
log_error("Profiling END should be the same as COMPLETE for "
|
||||
"kernels without children");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (end > complete)
|
||||
{
|
||||
log_error("Profiling END should be smaller than COMPLETE for "
|
||||
"kernels with device side children");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("Profiling info for '%s' kernel is OK for level %d.\n",
|
||||
"enqueue_multi_level", level);
|
||||
|
||||
clReleaseEvent(kernel_event);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -58,17 +58,12 @@ test_status InitCL(cl_device_id device) {
|
||||
}
|
||||
|
||||
test_definition test_list[] = {
|
||||
ADD_TEST( device_info ),
|
||||
ADD_TEST( device_queue ),
|
||||
ADD_TEST( execute_block ),
|
||||
ADD_TEST( enqueue_block ),
|
||||
ADD_TEST( enqueue_nested_blocks ),
|
||||
ADD_TEST( enqueue_wg_size ),
|
||||
ADD_TEST( enqueue_flags ),
|
||||
ADD_TEST( enqueue_multi_queue ),
|
||||
ADD_TEST( host_multi_queue ),
|
||||
ADD_TEST( enqueue_ndrange ),
|
||||
ADD_TEST( host_queue_order ),
|
||||
ADD_TEST(device_info), ADD_TEST(device_queue),
|
||||
ADD_TEST(execute_block), ADD_TEST(enqueue_block),
|
||||
ADD_TEST(enqueue_nested_blocks), ADD_TEST(enqueue_wg_size),
|
||||
ADD_TEST(enqueue_flags), ADD_TEST(enqueue_multi_queue),
|
||||
ADD_TEST(host_multi_queue), ADD_TEST(enqueue_ndrange),
|
||||
ADD_TEST(host_queue_order), ADD_TEST(enqueue_profiling),
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE( test_list );
|
||||
|
||||
@@ -26,6 +26,8 @@ extern int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_
|
||||
extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_profiling(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user