Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2025-11-06 14:34:30 -08:00
63 changed files with 4934 additions and 2745 deletions

View File

@@ -25,11 +25,18 @@ if(USE_CL_EXPERIMENTAL)
add_definitions(-DCL_EXPERIMENTAL)
endif(USE_CL_EXPERIMENTAL)
option(SANITIZER_THREAD "Build with the thread sanitiser" OFF)
if (SANITIZER_THREAD)
add_compile_options(-fsanitize=thread)
add_link_options(-fsanitize=thread)
endif (SANITIZER_THREAD)
#-----------------------------------------------------------
# Default Configurable Test Set
#-----------------------------------------------------------
option(D3D10_IS_SUPPORTED "Run DirectX 10 interop tests" OFF)
option(D3D11_IS_SUPPORTED "Run DirectX 11 interop tests" OFF)
option(D3D12_IS_SUPPORTED "Run DirectX 12 interop tests" OFF)
option(GL_IS_SUPPORTED "Run OpenGL interop tests" OFF)
option(GLES_IS_SUPPORTED "Run OpenGL ES interop tests" OFF)
option(VULKAN_IS_SUPPORTED "Run Vulkan interop tests" OFF)

View File

@@ -137,10 +137,10 @@ uint32_t get_channel_order_channel_count(cl_channel_order order)
case CL_RGx: return 2;
case CL_RGB:
case CL_RGBx:
case CL_sRGB:
case CL_sRGBx: return 3;
case CL_sRGB: return 3;
case CL_RGBx:
case CL_sRGBx:
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:

View File

@@ -39,4 +39,20 @@ inline std::string str_sprintf(const std::string &str, Args... args)
return std::string(buffer.get(), buffer.get() + s - 1);
}
// Returns the argument, converted to std::string.
// The return type of std::filesystem::path::u8string() was
// std::string in C++17, but became std::u8string in C++20.
// Use this method to wrap the result when a std::string
// is desired.
//
// Use a template with a specialization for std::string,
// so the generic template applies when std::u8string exists
// and is used.
template <typename STRING_TYPE>
inline std::string to_string(const STRING_TYPE &str)
{
return std::string(str.begin(), str.end());
}
inline std::string to_string(const std::string &str) { return str; }
#endif // STRING_HELPERS_H

View File

@@ -56,6 +56,9 @@ if(VULKAN_IS_SUPPORTED)
add_subdirectory( common/vulkan_wrapper )
add_subdirectory( vulkan )
endif()
if(D3D12_IS_SUPPORTED)
add_subdirectory(common/directx_wrapper)
endif ()
file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")

View File

@@ -15,142 +15,167 @@
//
#include "common.h"
typedef struct {
cl_int *pA;
cl_int *pB;
cl_int *pC;
typedef struct
{
cl_int* pA;
cl_int* pB;
cl_int* pC;
} BufPtrs;
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
"struct BufPtrs;\n"
"\n"
"typedef struct {\n"
" __global int *pA;\n"
" __global int *pB;\n"
" __global int *pC;\n"
"} BufPtrs;\n"
"\n"
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
"{\n"
" size_t i;\n"
" i = get_global_id(0);\n"
" pBufs->pA[i]++;\n"
" pBufs->pB[i]++;\n"
" pBufs->pC[i]++;\n"
"}\n"
const char* set_kernel_exec_info_svm_ptrs_kernel[] = {
"struct BufPtrs;\n"
"\n"
"typedef struct {\n"
" __global int *pA;\n"
" __global int *pB;\n"
" __global int *pC;\n"
"} BufPtrs;\n"
"\n"
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
"{\n"
" size_t i;\n"
" i = get_global_id(0);\n"
" pBufs->pA[i]++;\n"
" pBufs->pB[i]++;\n"
" pBufs->pC[i]++;\n"
"}\n"
};
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
// Test that clSetKernelExecInfo works correctly with
// CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
//
REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
{
clContextWrapper c = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
clContextWrapper c = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
// error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
// &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
&c, &program, &queues[0], &num_devices,
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
// error = create_cl_objects(device,
// &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q,
// &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
&c, &program, &queues[0], &num_devices,
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test.
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper k =
clCreateKernel(program, "set_kernel_exec_info_test", &error);
test_error(error, "clCreateKernel failed");
size_t size = num_elements*sizeof(int);
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
size_t size = num_elements * sizeof(int);
// int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE |
// CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); int* pB =
// (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
// sizeof(int)*num_elements, 0); int* pC = (int*) clSVMalloc(c,
// CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
// sizeof(int)*num_elements, 0);
int* pA = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pB = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pC = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
BufPtrs* pBuf =
(BufPtrs*)clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
bool failed = false;
{
clMemWrapper ba,bb,bc,bBuf;
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
error = clFinish(queues[0]);
test_error(error, "clFinish failed.");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++)
bool failed = false;
{
if(pA[i] + pB[i] + pC[i] != 3)
failed = true;
clMemWrapper ba, bb, bc, bBuf;
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf,
&error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for (int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS,
sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0,
NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queues[0]);
test_error(error, "clFinish failed.");
// Special case testing of unsetting previously set SVM pointers
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, 0, NULL);
test_error(error,
"Unsetting previously set SVM pointers using "
"clSetKernelExecInfo failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for (int i = 0; i < num_elements; i++)
{
if (pA[i] + pB[i] + pC[i] != 3) failed = true;
}
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
}
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
}
error = clFinish(queues[0]);
test_error(error, " clFinish failed.");
error = clFinish(queues[0]);
test_error(error, " clFinish failed.");
clSVMFree(c, pA);
clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
clSVMFree(c, pA);
clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
if (failed) return -1;
if(failed) return -1;
return 0;
return 0;
}

View File

@@ -16,92 +16,184 @@
#include "testBase.h"
#include "harness/typeWrappers.h"
#include <vector>
REGISTER_TEST(negative_create_command_queue)
{
cl_command_queue_properties device_props = 0;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
cl_int err = 0;
clCreateCommandQueue(nullptr, device, 0, &err);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clCreateCommandQueue should return CL_INVALID_CONTEXT when: \"context "
"is not a valid context\" using a nullptr",
TEST_FAIL);
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional property to
// clCreateCommandQueue, CL_QUEUE_PROFILING_ENABLE is mandatory.
const bool out_of_order_device_support =
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
if (out_of_order_device_support)
clCreateCommandQueue(context, nullptr, 0, &err);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clCreateCommandQueue should return CL_INVALID_DEVICE when: \"device "
"is not a valid device\" using a nullptr",
TEST_FAIL);
cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{
// Early return as we can't check correct error is returned for
// unsupported property.
return TEST_PASS;
clCreateCommandQueue(context, different_device, 0, &err);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clCreateCommandQueue should return CL_INVALID_DEVICE when: "
"\"device is not associated with context\"",
TEST_FAIL);
}
// Try create a command queue with out-of-order property and check return
// code
cl_int test_error = CL_SUCCESS;
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &test_error);
cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
-1) };
clCreateCommandQueue(context, device, invalid_property, &err);
test_failure_error_ret(
test_error, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES if "
"values specified in properties are valid but are not supported by "
"the "
"device.",
err, CL_INVALID_VALUE,
"clCreateCommandQueue should return CL_INVALID_VALUE when: \"values "
"specified in properties are not valid\"",
TEST_FAIL);
cl_command_queue_properties device_queue_properties = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_queue_properties),
&device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
cl_command_queue_properties valid_properties[] = {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
};
cl_queue_properties property{ 0 };
bool missing_property = false;
// Iterate through all possible properties to find one which isn't supported
for (auto prop : valid_properties)
{
if ((device_queue_properties & prop) == 0)
{
missing_property = true;
property = prop;
break;
}
}
// This test can only run when a device does not support a property
if (missing_property)
{
clCreateCommandQueue(context, device, property, &err);
test_failure_error_ret(
err, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES "
"when: \"values specified in properties are valid but are not "
"supported by the device\"",
TEST_FAIL);
}
return TEST_PASS;
}
REGISTER_TEST_VERSION(negative_create_command_queue_with_properties,
Version(2, 0))
{
cl_command_queue_properties device_props = 0;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
cl_int err = 0;
clCreateCommandQueueWithProperties(nullptr, device, nullptr, &err);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clCreateCommandQueueWithProperties should return CL_INVALID_CONTEXT "
"when: \"context is not a valid context\" using a nullptr",
TEST_FAIL);
cl_command_queue_properties device_on_host_props = 0;
error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
sizeof(device_on_host_props), &device_on_host_props,
NULL);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
clCreateCommandQueueWithProperties(context, nullptr, nullptr, &err);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clCreateCommandQueueWithProperties should return CL_INVALID_DEVICE "
"when: \"device is not a valid device\" using a nullptr",
TEST_FAIL);
if (device_on_host_props != device_props)
cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{
log_error(
"ERROR: CL_DEVICE_QUEUE_PROPERTIES and "
"CL_DEVICE_QUEUE_ON_HOST_PROPERTIES properties should match\n");
return TEST_FAIL;
clCreateCommandQueueWithProperties(context, different_device, nullptr,
&err);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_DEVICE when: \"device is not associated with context\"",
TEST_FAIL);
}
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
// property to clCreateCommandQueueWithProperties,
// CL_QUEUE_PROFILING_ENABLE is mandatory.
const bool out_of_order_device_support =
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
if (out_of_order_device_support)
{
// Early return as we can't check correct error is returned for
// unsupported property.
return TEST_PASS;
}
cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
-1) };
// Try create a command queue with out-of-order property and check return
// code
cl_command_queue_properties queue_prop_def[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
// Depending on the OpenCL Version, there can be up to 2 properties which
// each take values, and the list should be terminated with a 0
cl_queue_properties properties[] = { invalid_property, invalid_property, 0,
0, 0 };
clCreateCommandQueueWithProperties(context, device, properties, &err);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateCommandQueueWithProperties should return CL_INVALID_VALUE "
"when: \"values specified in properties are not valid\"",
TEST_FAIL);
cl_command_queue_properties device_queue_properties = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_queue_properties),
&device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
cl_command_queue_properties valid_properties[] = {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
};
cl_int test_error = CL_SUCCESS;
clCommandQueueWrapper test_queue = clCreateCommandQueueWithProperties(
context, device, queue_prop_def, &test_error);
test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueueWithProperties should "
"return CL_INVALID_QUEUE_PROPERTIES if "
"values specified in properties are valid but "
"are not supported by the "
"device.",
TEST_FAIL);
properties[0] = CL_QUEUE_PROPERTIES;
bool missing_property = false;
// Iterate through all possible properties to find one which isn't supported
for (auto property : valid_properties)
{
if ((device_queue_properties & property) == 0)
{
missing_property = true;
properties[1] = property;
break;
}
}
if (missing_property)
{
clCreateCommandQueueWithProperties(context, device, properties, &err);
test_failure_error_ret(
err, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_QUEUE_PROPERTIES when: \"values specified in "
"properties are valid but are not supported by the device\"",
TEST_FAIL);
}
else if (get_device_cl_version(device) >= Version(2, 0))
{
cl_uint max_size = -1;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
sizeof(max_size), &max_size, nullptr);
test_error(err, "clGetDeviceInfo");
if (max_size > 0 && max_size < CL_UINT_MAX)
{
properties[0] = CL_QUEUE_PROPERTIES;
properties[1] =
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties[2] = CL_QUEUE_SIZE;
properties[3] = max_size + 1;
clCreateCommandQueueWithProperties(context, device, properties,
&err);
if (err != CL_INVALID_VALUE && err != CL_INVALID_QUEUE_PROPERTIES)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES "
"when: \"values specified in properties are not "
"valid\" using a queue size greather than "
"CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE",
IGetErrorString(err),
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES",
__FILE__, __LINE__);
return TEST_FAIL;
}
}
}
return TEST_PASS;
}
@@ -166,3 +258,236 @@ REGISTER_TEST(negative_create_command_queue_with_properties_khr)
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
{
cl_int err = 0;
if (get_device_cl_version(device) >= Version(3, 0))
{
cl_device_device_enqueue_capabilities device_capabilities = 0;
cl_int err = clGetDeviceInfo(
device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
sizeof(device_capabilities), &device_capabilities, nullptr);
test_error(err, "clGetDeviceInfo");
if (((device_capabilities & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) == 0)
&& ((device_capabilities & CL_DEVICE_QUEUE_SUPPORTED) == 1))
{
const cl_queue_properties properties[] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_ON_DEVICE
| CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
0
};
clCommandQueueWrapper cmd_queue =
clCreateCommandQueueWithProperties(context, device, properties,
&err);
test_error(err, "clCreateCommandQueueWithProperties");
err = clSetDefaultDeviceCommandQueue(context, device, cmd_queue);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION when \"device does not support a "
"replaceable default on-device queue\"",
TEST_FAIL);
}
}
err = clSetDefaultDeviceCommandQueue(nullptr, device, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_CONTEXT)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT when: \"context "
"is not a valid context\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT", __FILE__,
__LINE__);
return TEST_FAIL;
}
err = clSetDefaultDeviceCommandQueue(context, nullptr, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: \"device "
"is not a valid device\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
__LINE__);
return TEST_FAIL;
}
cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{
err = clSetDefaultDeviceCommandQueue(context, different_device, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: "
"\"device is not associated with context\"",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
__LINE__);
return TEST_FAIL;
}
}
err = clSetDefaultDeviceCommandQueue(context, device, nullptr);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{
log_error(
"ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return CL_INVALID_OPERATION "
"or CL_INVALID_COMMAND_QUEUE when: \"command_queue is not a valid "
"command-queue for device\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE", __FILE__,
__LINE__);
return TEST_FAIL;
}
{
constexpr cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
};
clCommandQueueWrapper not_on_device_queue =
clCreateCommandQueueWithProperties(context, device, props, &err);
test_error_fail(err, "clCreateCommandQueueWithProperties failed");
err = clSetDefaultDeviceCommandQueue(context, device,
not_on_device_queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue for "
"device\" using a command queue that is not on device",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
__FILE__, __LINE__);
}
}
return TEST_PASS;
}
REGISTER_TEST(negative_retain_command_queue)
{
cl_int err = clRetainCommandQueue(nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clRetainCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_release_command_queue)
{
cl_int err = clReleaseCommandQueue(nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clReleaseCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
static bool device_supports_on_device_queue(cl_device_id deviceID)
{
cl_command_queue_properties device_queue_properties = 0;
if (get_device_cl_version(deviceID) >= Version(2, 0))
{
cl_int err = clGetDeviceInfo(
deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
sizeof(device_queue_properties), &device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
return (device_queue_properties
& CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
}
return false;
}
REGISTER_TEST(negative_get_command_queue_info)
{
cl_int err =
clGetCommandQueueInfo(nullptr, CL_QUEUE_CONTEXT, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clGetCommandQueueInfo should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
if (device_supports_on_device_queue(device))
{
const cl_queue_properties properties[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
};
cl_int err = CL_INVALID_VALUE;
clCommandQueueWrapper cmd_queue = clCreateCommandQueueWithProperties(
context, device, properties, &err);
test_error(err, "clCreateCommandQueueWithProperties");
cl_uint queue_size = -1;
err = clGetCommandQueueInfo(cmd_queue, CL_QUEUE_SIZE,
sizeof(queue_size), &queue_size, nullptr);
test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
"clGetCommandQueueInfo should return "
"CL_INVALID_COMMAND_QUEUE when: \"command_queue "
"is not a valid command-queue for param_name\"",
TEST_FAIL);
}
constexpr cl_command_queue_info invalid_param = -1;
err = clGetCommandQueueInfo(queue, invalid_param, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: "
"\"param_name is not one of the supported values\"",
TEST_FAIL);
cl_uint ref_count = -1;
err = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, 0, &ref_count,
nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: \"size in "
"bytes specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST_VERSION(negative_set_command_queue_property, Version(1, 0))
{
auto version = get_device_cl_version(device);
if (version >= Version(1, 1))
{
// Implementations are allowed to return an error for
// non-OpenCL 1.0 devices. In which case, skip the test.
return TEST_SKIPPED_ITSELF;
}
cl_queue_properties property{ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE };
cl_int err = clSetCommandQueueProperty(nullptr, property, CL_TRUE, nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clSetCommandQueueProperty should return CL_INVALID_COMMAND_QUEUE "
"when: \"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
property = -1;
err = clSetCommandQueueProperty(queue, property, CL_TRUE, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clSetCommandQueueProperty should return CL_INVALID_VALUE when: "
"\"values specified in properties are not valid\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -336,3 +336,161 @@ REGISTER_TEST(kernel_attributes)
}
return success ? TEST_PASS : TEST_FAIL;
}
REGISTER_TEST(null_required_work_group_size)
{
cl_int error = CL_SUCCESS;
clGetKernelSuggestedLocalWorkSizeKHR_fn
clGetKernelSuggestedLocalWorkSizeKHR = nullptr;
if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
{
cl_platform_id platform = nullptr;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
&platform, NULL);
test_error(error, "clGetDeviceInfo for platform failed");
clGetKernelSuggestedLocalWorkSizeKHR =
(clGetKernelSuggestedLocalWorkSizeKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSuggestedLocalWorkSizeKHR");
test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
"Couldn't get function pointer for "
"clGetKernelSuggestedLocalWorkSizeKHR");
}
cl_uint device_max_dim = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(device_max_dim), &device_max_dim, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
test_assert_error(device_max_dim >= 3,
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
std::vector<size_t> device_max_work_item_sizes(device_max_dim);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * device_max_dim,
device_max_work_item_sizes.data(), nullptr);
size_t device_max_work_group_size = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(device_max_work_group_size),
&device_max_work_group_size, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
clMemWrapper dst;
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error);
struct KernelAttribInfo
{
std::string str;
cl_uint max_dim;
};
std::vector<KernelAttribInfo> attribs;
attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
const std::string body_str = R"(
__kernel void wg_size(__global int* dst)
{
if (get_global_id(0) == 0 &&
get_global_id(1) == 0 &&
get_global_id(2) == 0) {
dst[0] = get_local_size(0);
dst[1] = get_local_size(1);
dst[2] = get_local_size(2);
}
}
)";
for (auto& attrib : attribs)
{
const std::string source_str = attrib.str + body_str;
const char* source = source_str.c_str();
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "wg_size");
test_error(error, "Unable to create test kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
{
const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
work_dim >= 3 ? 4 : 1 };
const size_t test_work_group_size =
expected[0] * expected[1] * expected[2];
if ((size_t)expected[0] > device_max_work_item_sizes[0]
|| (size_t)expected[1] > device_max_work_item_sizes[1]
|| (size_t)expected[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size)
{
log_info("Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, expected[0], expected[1], expected[2],
test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1],
device_max_work_item_sizes[2],
device_max_work_group_size);
continue;
}
const cl_int zero = 0;
error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
sizeof(expected), 0, nullptr, nullptr);
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
global_work_size, nullptr, 0,
nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
cl_int results[3] = { -1, -1, -1 };
error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
if (results[0] != expected[0] || results[1] != expected[1]
|| results[2] != expected[2])
{
log_error("Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
results[0], results[1], results[2]);
return TEST_FAIL;
}
if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
{
size_t suggested[3] = { 1, 1, 1 };
error = clGetKernelSuggestedLocalWorkSizeKHR(
queue, kernel, work_dim, nullptr, global_work_size,
suggested);
test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed");
if ((cl_int)suggested[0] != expected[0]
|| (cl_int)suggested[1] != expected[1]
|| (cl_int)suggested[2] != expected[2])
{
log_error("Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
(cl_int)suggested[0], (cl_int)suggested[1],
(cl_int)suggested[2]);
return TEST_FAIL;
}
}
}
}
return TEST_PASS;
}

View File

@@ -16,6 +16,8 @@
#include "testBase.h"
#include "harness/typeWrappers.h"
#include "harness/conversions.h"
#include "harness/stringHelpers.h"
#include <array>
#include <vector>
const char *sample_single_test_kernel[] = {
@@ -87,6 +89,16 @@ const char *sample_two_kernel_program[] = {
"\n"
"}\n" };
const char *sample_sampler_size_test_kernel = R"(
__kernel void sampler_size_test(sampler_t sampler, __read_only image2d_t src, __global float4 *dst)
{
int tid = get_global_id(0);
int2 coord = (int2)(get_global_id(0), get_global_id(1));
float4 data = read_imagef(src, sampler, coord);
dst[tid] = data;
}
)";
const char *sample_mem_obj_size_test_kernel = R"(
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
{
@@ -117,6 +129,14 @@ const char *sample_write_only_image_test_kernel = R"(
}
)";
const char *sample_arg_size_test_kernel = R"(
%s
__kernel void arg_size_test(%s src, __global %s *dst)
{
dst[0]=src;
}
)";
REGISTER_TEST(get_kernel_info)
{
int error;
@@ -734,6 +754,148 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_sampler)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper sampler_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_sampler_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
test_error(error,
"Unable to get sampler_size_test kernel for built program");
// Run the test - CL_INVALID_SAMPLER
error = clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler), nullptr);
test_failure_error_ret(
error, CL_INVALID_SAMPLER,
"clSetKernelArg is supposed to fail with CL_INVALID_SAMPLER when "
"argument is declared to be of type sampler_t and the specified "
"arg_value is not a valid sampler object",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_sampler_size)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper sampler_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_sampler_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
test_error(error,
"Unable to get sampler_size_test kernel for built program");
clSamplerWrapper sampler = clCreateSampler(
context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
test_error(error, "Unable to create sampler");
// Run the test - CL_INVALID_ARG_SIZE
error =
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) * 2, &sampler);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a sampler object and arg_size > sizeof(cl_sampler)",
TEST_FAIL);
error =
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) / 2, &sampler);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a sampler object and arg_size < sizeof(cl_sampler)",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_size)
{
std::vector<ExplicitType> exp_types = { kChar, kUChar, kShort, kUShort,
kInt, kUInt, kLong, kULong,
kFloat, kHalf, kDouble };
bool fp16_supported = is_extension_available(device, "cl_khr_fp16");
bool fp64_supported = is_extension_available(device, "cl_khr_fp64");
for (unsigned int type_num = 0; type_num < exp_types.size(); type_num++)
{
auto type = exp_types[type_num];
if ((type == kLong || type == kULong) && !gHasLong)
continue;
else if (type == kDouble && !fp64_supported)
continue;
else if (type == kHalf && !fp16_supported)
continue;
else if (strchr(get_explicit_type_name(type), ' ') != 0)
continue;
std::array<unsigned int, 5> sizes = { 1, 2, 4, 8, 16 };
std::vector<char> buf(sizeof(cl_ulong16), 0);
for (unsigned i = 0; i < sizes.size(); i++)
{
clProgramWrapper program;
clKernelWrapper kernel;
size_t destStride = get_explicit_type_size(type) * sizes[i];
std::ostringstream vecNameStr;
vecNameStr << get_explicit_type_name(type);
if (sizes[i] != 1) vecNameStr << sizes[i];
std::string ext_str;
if (type == kDouble)
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (type == kHalf)
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
auto vt_name = vecNameStr.str();
std::string program_source =
str_sprintf(std::string(sample_arg_size_test_kernel),
ext_str.c_str(), vt_name.c_str(), vt_name.c_str());
const char *ptr = program_source.c_str();
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &ptr, "arg_size_test");
test_error(error, "Unable to build test program!");
// Run the test
size_t reduced = destStride / 2;
error = clSetKernelArg(kernel, 0, reduced, buf.data());
if (error != CL_INVALID_ARG_SIZE)
{
std::stringstream sstr;
sstr << "clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_SIZE with type "
<< vecNameStr.str() << " and sizeof " << reduced
<< std::endl;
log_error("%s", sstr.str().c_str());
return TEST_FAIL;
}
}
}
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_mem_obj)
{
cl_int error = CL_SUCCESS;

View File

@@ -255,18 +255,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
}
// Initialize our storage
std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]);
if (!l_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"bin_counters.\n");
return -1;
}
std::vector<cl_int> l_bin_counts(number_of_bins);
int i;
for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
sizeof(cl_int) * number_of_bins,
l_bin_counts.get(), 0, NULL, NULL);
l_bin_counts.data(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to set initial values for "
@@ -275,19 +269,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
std::unique_ptr<cl_int[]> values(
new cl_int[number_of_bins * max_counts_per_bin]);
if (!values)
{
log_error(
"add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
std::vector<cl_int> values(number_of_bins * max_counts_per_bin);
for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
err = clEnqueueWriteBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins
* max_counts_per_bin,
values.get(), 0, NULL, NULL);
values.data(), 0, NULL, NULL);
if (err)
{
log_error(
@@ -296,13 +283,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]);
if (!l_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"l_bin_assignments.\n");
return -1;
}
std::vector<cl_int> l_bin_assignments(number_of_items);
for (i = 0; i < number_of_items; i++)
{
int bin = random_in_range(0, number_of_bins - 1, d);
@@ -326,7 +307,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
}
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
sizeof(cl_int) * number_of_items,
l_bin_assignments.get(), 0, NULL, NULL);
l_bin_assignments.data(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to set initial values for "
@@ -355,34 +336,22 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
std::unique_ptr<cl_int[]> final_bin_assignments(
new cl_int[number_of_bins * max_counts_per_bin]);
if (!final_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"final_bin_assignments.\n");
return -1;
}
std::vector<cl_int> final_bin_assignments(number_of_bins
* max_counts_per_bin);
err = clEnqueueReadBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins
* max_counts_per_bin,
final_bin_assignments.get(), 0, NULL, NULL);
final_bin_assignments.data(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
return -1;
}
std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]);
if (!final_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"final_bin_counts.\n");
return -1;
}
std::vector<cl_int> final_bin_counts(number_of_bins);
err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
sizeof(cl_int) * number_of_bins,
final_bin_counts.get(), 0, NULL, NULL);
final_bin_counts.data(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",

View File

@@ -193,15 +193,38 @@ int AtomicTypeInfo::IsSupported(cl_device_id device)
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;}
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() {return 0;}
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() {return CL_FLT_MIN;}
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue() {return CL_DBL_MIN;}
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
{
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
{
return -CL_FLT_MAX;
}
template <> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue()
{
return -CL_DBL_MAX;
}
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue() {return CL_INT_MAX;}
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue() {return CL_UINT_MAX;}
template <> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue()
{
return CL_INT_MAX;
}
template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
{
return CL_UINT_MAX;
}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
{
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
{
return CL_FLT_MAX;
}
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
cl_int getSupportedMemoryOrdersAndScopes(

View File

@@ -79,6 +79,7 @@ extern cl_device_atomic_capabilities gAtomicMemCap,
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
extern const char *
@@ -893,15 +894,16 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
header += std::string("__global volatile ") + aTypeName + " destMemory["
+ ss.str() + "] = {\n";
ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF)
ss << static_cast<HostDataType>(
cl_half_to_float(static_cast<cl_half>(_startValue)));
ss << cl_half_to_float(static_cast<cl_half>(_startValue));
else
ss << _startValue;
for (cl_uint i = 0; i < maxNumDestItems; i++)
{
if (aTypeName == "atomic_flag")

View File

@@ -18,7 +18,6 @@
#include "harness/testHarness.h"
#include <mutex>
#include "CL/cl_half.h"
#ifdef WIN32
@@ -99,7 +98,19 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
@@ -109,7 +120,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
}
else
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
return InterlockedExchangeAdd(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_add(a, c);
@@ -124,7 +135,15 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a -= c;
return old_value;
}
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
@@ -173,14 +192,30 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_failure)
{
CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile cl_half *>(a);
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
{
*reinterpret_cast<volatile cl_half *>(a) = desired;
return true;
}
*expected = tmp;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected)
{
*reinterpret_cast<volatile float *>(a) = desired;
*a = desired;
return true;
}
*expected = tmp;
@@ -188,7 +223,6 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
#elif defined(__GNUC__)
tmp = __sync_val_compare_and_swap(a, *expected, desired);

View File

@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
test_status InitCL(cl_device_id device) {
@@ -134,6 +135,14 @@ test_status InitCL(cl_device_id device) {
{
gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64"))
{
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
}
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);

View File

@@ -417,7 +417,7 @@ public:
correct = true;
for (cl_uint i = 0; i < threadCount; i++)
{
if constexpr (std::is_same<HostDataType, cl_half>::value)
if constexpr (std::is_same_v<HostDataType, cl_half>)
{
HostDataType test = cl_half_from_float(static_cast<float>(i),
gHalfRoundingMode);
@@ -1163,13 +1163,30 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
num_elements, true);
}
template <typename T> double kahan_sum(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sum<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = num - compensation;
double t = sum + y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAdd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
double max_error_fp32;
double max_error;
std::vector<HostDataType> ref_vals;
public:
@@ -1182,26 +1199,98 @@ public:
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM),
min_range(-999.0), max_range(999.0), max_error_fp32(0.0)
min_range(-999.0), max_range(999.0), max_error(0.0)
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
StartValue(0.f);
StartValue((HostDataType)0.0);
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
{
cl_half sum = 0;
for (auto it = begin; it != end; ++it)
{
sum = cl_half_from_float(cl_half_to_float(sum)
+ cl_half_to_float(*it),
gHalfRoundingMode);
}
return cl_half_to_float(sum);
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d);
ref_vals[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible summation error for given set.
std::vector<float> sums;
std::sort(ref_vals.begin(), ref_vals.end(),
[](cl_half a, cl_half b) {
return cl_half_to_float(a) < cl_half_to_float(b);
});
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
std::sort(ref_vals.begin(), ref_vals.end(),
[](cl_half a, cl_half b) {
return std::abs(cl_half_to_float(a))
< std::abs(cl_half_to_float(b));
});
float precise = 0.f;
for (auto elem : ref_vals) precise += cl_half_to_float(elem);
sums.push_back(precise);
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error = std::abs(sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
ref_vals[i] =
get_random_double(min_range, max_range, d);
else
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
@@ -1216,12 +1305,17 @@ public:
sums.push_back(
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort(
ref_vals.begin(), ref_vals.end(),
[](float a, float b) { return std::abs(a) < std::abs(b); });
std::sort(ref_vals.begin(), ref_vals.end(),
[](HostDataType a, HostDataType b) {
return std::abs(a) < std::abs(b);
});
double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem);
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
precise = kahan_sum(ref_vals);
else
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(
@@ -1231,8 +1325,7 @@ public:
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort(sums.begin(), sums.end());
max_error_fp32 =
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
max_error = std::abs(sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
@@ -1252,7 +1345,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1286,7 +1382,10 @@ public:
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder());
@@ -1312,7 +1411,23 @@ public:
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
expected = cl_half_from_float(
cl_half_to_float(expected)
+ cl_half_to_float(startRefValues[i]),
gHalfRoundingMode);
}
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
@@ -1331,12 +1446,22 @@ public:
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected
return std::abs(cl_half_to_float(expected)
- cl_half_to_float(testValues[whichDestValue]))
> max_error;
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HostDataType)expected
- testValues[whichDestValue])
> max_error_fp32;
> max_error;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
@@ -1346,7 +1471,10 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
@@ -1369,7 +1497,28 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1385,7 +1534,10 @@ public:
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return threadCount;
}
@@ -1420,6 +1572,16 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported)
{
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
@@ -1506,7 +1668,10 @@ public:
useSVM),
min_range(-999.0), max_range(999.0), max_error(0.0)
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
{
StartValue(0);
CBasicTestMemOrderScope<HostAtomicType,
@@ -1514,6 +1679,13 @@ public:
}
}
template <typename Iterator>
HostDataType subtract(Iterator begin, Iterator end)
{
HostDataType res = 0;
for (auto it = begin; it != end; ++it) res = res - *it;
return res;
}
template <typename Iterator>
float subtract_halfs(Iterator begin, Iterator end)
{
cl_half res = 0;
@@ -1528,12 +1700,53 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible subtraction error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(
ref_vals.begin(), ref_vals.end(),
[](float a, float b) { return std::abs(a) < std::abs(b); });
double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error =
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
@@ -1563,7 +1776,6 @@ public:
float precise = 0.f;
for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
sums.push_back(precise);
sums.push_back(
subtract_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back(
@@ -1571,7 +1783,6 @@ public:
std::sort(sums.begin(), sums.end());
max_error = std::abs(sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
@@ -1590,7 +1801,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1612,7 +1826,10 @@ public:
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder());
@@ -1634,7 +1851,13 @@ public:
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
expected -= startRefValues[i];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
@@ -1659,7 +1882,14 @@ public:
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected
- testValues[whichDestValue])
> max_error;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
return std::abs(cl_half_to_float(expected)
@@ -1674,7 +1904,7 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if (std::is_same_v<HostDataType, HOST_FLOAT>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
@@ -1697,7 +1927,17 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1713,7 +1953,10 @@ public:
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return threadCount;
}
@@ -1748,6 +1991,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported)
{
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
@@ -2624,7 +2872,10 @@ public:
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MaxValue());
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
@@ -2634,7 +2885,10 @@ public:
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
return " atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2653,7 +2907,10 @@ public:
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
MemoryOrder());
@@ -2669,7 +2926,19 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
@@ -2696,7 +2965,19 @@ public:
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (cl_half_to_float(startRefValues[i])
< cl_half_to_float(expected))
expected = startRefValues[i];
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
@@ -2716,7 +2997,9 @@ public:
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -2731,12 +3014,14 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
for (cl_uint i = 1; i < threadCount; i++)
if (refValues[i] != StartValue())
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
@@ -2754,7 +3039,31 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -2772,7 +3081,10 @@ public:
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
return threadCount;
}
@@ -2807,6 +3119,16 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported)
{
CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
@@ -2890,18 +3212,31 @@ public:
useSVM),
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MinValue());
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
else
StartValue(-DataType().MaxValue());
}
else
{
StartValue(DataType().MinValue());
}
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
return " atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2920,7 +3255,10 @@ public:
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
MemoryOrder());
@@ -2936,7 +3274,19 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
@@ -2963,7 +3313,19 @@ public:
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (cl_half_to_float(startRefValues[i])
> cl_half_to_float(expected))
expected = startRefValues[i];
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
@@ -2983,7 +3345,9 @@ public:
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -2998,7 +3362,9 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
@@ -3021,7 +3387,31 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -3039,7 +3429,10 @@ public:
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
return threadCount;
}
@@ -3074,6 +3467,16 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported)
{
CBasicTestFetchMax<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
@@ -3898,6 +4301,9 @@ private:
struct TestDefinition _subCase;
};
#if 0
// The tests below are likely incorrect and have been disabled.
// See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544
static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
@@ -3977,3 +4383,4 @@ REGISTER_TEST(svm_atomic_fence)
return test_atomic_fence_generic(device, context, queue, num_elements,
true);
}
#endif

View File

@@ -0,0 +1,13 @@
set(DIRECTX_WRAPPER_SOURCES
directx_wrapper.cpp
)
add_library(directx_wrapper STATIC ${DIRECTX_WRAPPER_SOURCES})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CLConform_INCLUDE_DIR})
if (WIN32)
target_link_libraries(directx_wrapper d3d12)
endif ()

View File

@@ -0,0 +1,71 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "directx_wrapper.hpp"
DirectXWrapper::DirectXWrapper()
{
HRESULT hr = D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_12_0,
IID_PPV_ARGS(&dx_device));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create DirectX 12 device");
}
D3D12_COMMAND_QUEUE_DESC desc{};
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
hr = dx_device->CreateCommandQueue(&desc, IID_PPV_ARGS(&dx_command_queue));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create DirectX 12 command queue");
}
hr = dx_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
IID_PPV_ARGS(&dx_command_allocator));
if (FAILED(hr))
{
throw std::runtime_error(
"Failed to create DirectX 12 command allocator");
}
}
ID3D12Device* DirectXWrapper::getDXDevice() const { return dx_device.Get(); }
ID3D12CommandQueue* DirectXWrapper::getDXCommandQueue() const
{
return dx_command_queue.Get();
}
ID3D12CommandAllocator* DirectXWrapper::getDXCommandAllocator() const
{
return dx_command_allocator.Get();
}
DirectXFenceWrapper::DirectXFenceWrapper(ID3D12Device* dx_device)
: dx_device(dx_device)
{
if (!dx_device)
{
throw std::runtime_error("ID3D12Device is not valid");
}
const HRESULT hr = dx_device->CreateFence(0, D3D12_FENCE_FLAG_SHARED,
IID_PPV_ARGS(&dx_fence));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create the DirectX fence");
}
}

View File

@@ -0,0 +1,47 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include <d3d12.h>
#include <wrl/client.h>
#include <stdexcept>
using namespace Microsoft::WRL;
class DirectXWrapper {
public:
DirectXWrapper();
ID3D12Device* getDXDevice() const;
ID3D12CommandQueue* getDXCommandQueue() const;
ID3D12CommandAllocator* getDXCommandAllocator() const;
protected:
ComPtr<ID3D12Device> dx_device = nullptr;
ComPtr<ID3D12CommandQueue> dx_command_queue = nullptr;
ComPtr<ID3D12CommandAllocator> dx_command_allocator = nullptr;
};
class DirectXFenceWrapper {
public:
DirectXFenceWrapper(ID3D12Device* dx_device);
ID3D12Fence* operator*() const { return dx_fence.Get(); }
private:
ComPtr<ID3D12Fence> dx_fence = nullptr;
ComPtr<ID3D12Device> dx_device = nullptr;
};

View File

@@ -57,7 +57,7 @@ namespace {
template <typename T>
int verify_degrees(const T *const inptr, const T *const outptr, int n)
{
float error, max_error = 0.0f;
float error, max_error = -INFINITY;
double r, max_val = NAN;
int max_index = 0;
@@ -89,13 +89,16 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
}
if (std::is_same<T, half>::value)
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
max_val, conv_to_flt(outptr[max_index]));
log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
"vs %g)\n",
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
conv_to_flt(outptr[max_index]), max_val,
conv_to_flt(outptr[max_index]));
else
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
max_error, max_index, max_val, outptr[max_index], max_val,
outptr[max_index]);
log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
"vs %g)\n",
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
outptr[max_index], max_val, outptr[max_index]);
return 0;
}
@@ -103,7 +106,7 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
template <typename T>
int verify_radians(const T *const inptr, const T *const outptr, int n)
{
float error, max_error = 0.0f;
float error, max_error = -INFINITY;
double r, max_val = NAN;
int max_index = 0;
@@ -135,13 +138,16 @@ int verify_radians(const T *const inptr, const T *const outptr, int n)
}
if (std::is_same<T, half>::value)
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
max_val, conv_to_flt(outptr[max_index]));
log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
"vs %g)\n",
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
conv_to_flt(outptr[max_index]), max_val,
conv_to_flt(outptr[max_index]));
else
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
max_error, max_index, max_val, outptr[max_index], max_val,
outptr[max_index]);
log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
"vs %g)\n",
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
outptr[max_index], max_val, outptr[max_index]);
return 0;
}

View File

@@ -1,5 +1,7 @@
set(MODULE_NAME COMPILER)
find_package(Python3 COMPONENTS Interpreter QUIET)
set(${MODULE_NAME}_SOURCES
main.cpp
test_build_helpers.cpp
@@ -52,7 +54,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
COMMAND ${Python3_EXECUTABLE} ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
VERBATIM)

View File

@@ -13,10 +13,62 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <string>
#include <filesystem>
#include "harness/testHarness.h"
#include "harness/stringHelpers.h"
std::string spvBinariesPath = "spirv_bin";
const std::string spvBinariesPathArg = "--spirv-binaries-path";
void printUsage()
{
log_info("Reading SPIR-V files from default '%s' path.\n",
spvBinariesPath.c_str());
log_info("In case you want to set other directory use '%s' argument.\n",
spvBinariesPathArg.c_str());
}
int main(int argc, const char *argv[])
{
bool modifiedSpvBinariesPath = false;
bool listTests = false;
for (int i = 0; i < argc; ++i)
{
int argsRemoveNum = 0;
if (argv[i] == spvBinariesPathArg)
{
if (i + 1 == argc)
{
log_error("Missing value for '%s' argument.\n",
spvBinariesPathArg.c_str());
return TEST_FAIL;
}
else
{
spvBinariesPath = std::string(argv[i + 1]);
argsRemoveNum += 2;
modifiedSpvBinariesPath = true;
}
}
if (argsRemoveNum > 0)
{
for (int j = i; j < (argc - argsRemoveNum); ++j)
argv[j] = argv[j + argsRemoveNum];
argc -= argsRemoveNum;
--i;
}
listTests |= (argv[i] == std::string("--list")
|| argv[i] == std::string("-list"));
}
if (modifiedSpvBinariesPath == false && !listTests)
{
printUsage();
}
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0);
}

View File

@@ -24,6 +24,8 @@
#include <unistd.h>
#endif
#include "harness/conversions.h"
#include "harness/stringHelpers.h"
#include "harness/parseParameters.h"
#define MAX_LINE_SIZE_IN_PROGRAM 1024
#define MAX_LOG_SIZE_IN_PROGRAM 2048
@@ -158,6 +160,14 @@ const char *link_static_function_access = // use with compile_static_function
"extern int foo(int, int);\n"
"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
const char *multi_build_test_kernel = R"(
__kernel void test_kernel(__global int *dst)
{
int tid = get_global_id(0);
dst[tid] = BUILD_OPT_VAL;
}
)";
static int test_large_single_compile(cl_context context, cl_device_id deviceID,
unsigned int numLines)
{
@@ -3059,12 +3069,13 @@ REGISTER_TEST(execute_after_included_header_link)
}
const auto simple_header_path = temp_dir_path / simple_header_name;
FILE *simple_header_file =
fopen(simple_header_path.u8string().c_str(), "w");
const std::string simple_header_path_str =
to_string(simple_header_path.u8string());
FILE *simple_header_file = fopen(simple_header_path_str.c_str(), "w");
if (simple_header_file == NULL)
{
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
simple_header_path_str.c_str(), __FILE__, __LINE__);
return -1;
}
if (fprintf(simple_header_file, "%s", simple_header) < 0)
@@ -3082,7 +3093,7 @@ REGISTER_TEST(execute_after_included_header_link)
}
const std::string include_path =
std::string("-I") + temp_dir_path.generic_u8string();
std::string("-I") + to_string(temp_dir_path.generic_u8string());
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
NULL, NULL, NULL);
test_error(error,
@@ -3931,3 +3942,100 @@ REGISTER_TEST(compile_and_link_status_options_log)
return 0;
}
REGISTER_TEST(multiple_build_program)
{
if (gCompilationMode != kOnline)
{
log_info(
"Skipping multiple_build_program, compilation mode not online\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error = CL_SUCCESS;
const size_t num_threads = num_elements;
clProgramWrapper program = clCreateProgramWithSource(
context, 1, &multi_build_test_kernel, nullptr, &error);
test_error(error, "clCreateProgramWithSource failed");
clMemWrapper out_stream_0 = clCreateBuffer(
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
test_error(error, "clCreateBuffer failed");
clMemWrapper out_stream_1 = clCreateBuffer(
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
test_error(error, "clCreateBuffer failed");
{
/* Build with the macro defined */
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=1 ", NULL,
NULL);
test_error(error, "clBuildProgram failed");
clKernelWrapper kernel0 =
clCreateKernel(program, "test_kernel", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel0, 0, sizeof(out_stream_0), &out_stream_0);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
/* Rebuild with the macro redefined */
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=2 ", NULL,
NULL);
test_error(error, "clBuildProgram failed");
clKernelWrapper kernel1 =
clCreateKernel(program, "test_kernel", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel1, 0, sizeof(out_stream_1), &out_stream_1);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
}
error = clFinish(queue);
test_error(error, "clFinish failed");
std::vector<cl_int> test_values(num_threads, 0);
error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,
sizeof(cl_int) * num_threads,
test_values.data(), 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < test_values.size(); i++)
{
if (test_values[i] != 1)
{
log_error("Unexpected test value %d for kernel0 at pos %zu.\n",
test_values[i], i);
return TEST_FAIL;
}
}
error = clEnqueueReadBuffer(queue, out_stream_1, true, 0,
sizeof(cl_int) * num_threads,
test_values.data(), 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < test_values.size(); i++)
{
if (test_values[i] != 2)
{
log_error("Unexpected test value %d for kernel1 at pos %zu.\n",
test_values[i], i);
return TEST_FAIL;
}
}
return TEST_PASS;
}

View File

@@ -94,6 +94,7 @@ const char *known_extensions[] = {
"cl_khr_external_memory_dma_buf",
"cl_khr_command_buffer",
"cl_khr_command_buffer_mutable_dispatch",
"cl_khr_command_buffer_mutable_memory_commands",
"cl_khr_command_buffer_multi_device",
"cl_khr_external_memory_android_hardware_buffer",
"cl_khr_unified_svm",

View File

@@ -32,8 +32,6 @@ const std::string slash = "\\";
#else
const std::string slash = "/";
#endif
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
+ slash + "spirv_bin" + slash + "write_kernel.spv";
const std::string spvExt = ".spv";
@@ -338,8 +336,8 @@ public:
std::vector<unsigned char> kernel_buffer;
std::string file_name =
compilerSpvBinaries + std::to_string(address_bits);
std::string file_name = spvBinariesPath + slash + "write_kernel.spv"
+ std::to_string(address_bits);
m_spirv_binary = readBinary(file_name.c_str());
m_spirv_size = m_spirv_binary.size();
}

View File

@@ -1,4 +1,6 @@
#include <array>
#include <string>
extern std::string spvBinariesPath;
static const char write_kernel_source[] = R"(
kernel void write_kernel(global unsigned int *p) {

View File

@@ -15,3 +15,6 @@ add_subdirectory( cl_ext_buffer_device_address )
if(VULKAN_IS_SUPPORTED)
add_subdirectory( cl_khr_external_semaphore )
endif()
if(D3D12_IS_SUPPORTED)
add_subdirectory( cl_khr_external_semaphore_dx_fence )
endif()

View File

@@ -17,6 +17,7 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_barrier.cpp
command_buffer_test_event_info.cpp
command_buffer_finalize.cpp
command_buffer_pipelined_enqueue.cpp
negative_command_buffer_finalize.cpp
negative_command_buffer_svm_mem.cpp
negative_command_buffer_copy_image.cpp

View File

@@ -27,9 +27,6 @@ BasicCommandBufferTest::BasicCommandBufferTest(cl_device_id device,
: CommandBufferTestBase(device), context(context), queue(nullptr),
num_elements(0), simultaneous_use_support(false),
out_of_order_support(false), queue_out_of_order_support(false),
// try to use simultaneous path by default
simultaneous_use_requested(true),
// due to simultaneous cases extend buffer size
buffer_size_multiplier(1), command_buffer(this)
{
cl_int error = clRetainCommandQueue(queue);
@@ -72,9 +69,8 @@ bool BasicCommandBufferTest::Skip()
sizeof(capabilities), &capabilities, NULL);
test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
simultaneous_use_support = simultaneous_use_requested
&& (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR)
!= 0;
simultaneous_use_support =
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR) != 0;
out_of_order_support =
supported_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
device_side_enqueue_support =
@@ -167,19 +163,7 @@ cl_int BasicCommandBufferTest::SetUp(int elements)
error = SetUpKernelArgs();
test_error(error, "SetUpKernelArgs failed");
if (simultaneous_use_support)
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR,
0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
}
else
{
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
}
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
@@ -192,11 +176,6 @@ cl_int MultiFlagCreationTest::Run()
// First try to find multiple flags that are supported by the driver and
// device.
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
{
@@ -207,6 +186,11 @@ cl_int MultiFlagCreationTest::Run()
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
}
cl_command_buffer_properties_khr props[] = { CL_COMMAND_BUFFER_FLAGS_KHR,
@@ -381,11 +365,6 @@ cl_int ExplicitFlushTest::Run()
return CL_SUCCESS;
}
bool ExplicitFlushTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int InterleavedEnqueueTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(
@@ -431,11 +410,6 @@ cl_int InterleavedEnqueueTest::Run()
return CL_SUCCESS;
}
bool InterleavedEnqueueTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int EnqueueAndReleaseTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(

View File

@@ -78,8 +78,11 @@ protected:
bool queue_out_of_order_support;
bool device_side_enqueue_support;
// user request for simultaneous use
bool simultaneous_use_requested;
// Extends size of created 'in_mem' & 'out_mem' buffers, such that the same
// cl_mem buffer can be used across multiple enqueues of a command-buffer.
// Accessed in the kernel at an offset for each enqueue which is passed as
// a kernel parameter through the 'off_mem' buffer.
// See BasicCommandBufferTest::SetUpKernel() definition.
unsigned buffer_size_multiplier;
clCommandBufferWrapper command_buffer;
};
@@ -116,7 +119,6 @@ struct ExplicitFlushTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
bool Skip() override;
};
// Test enqueueing a command-buffer twice separated by another enqueue operation
@@ -125,7 +127,6 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
bool Skip() override;
};
// Test releasing a command-buffer after it has been submitted for execution,
@@ -156,9 +157,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
cl_version extension_version =
get_extension_version(device, "cl_khr_command_buffer");
if (extension_version != CL_MAKE_VERSION(0, 9, 7))
if (extension_version != CL_MAKE_VERSION(0, 9, 8))
{
log_info("cl_khr_command_buffer version 0.9.7 is required to run "
log_info("cl_khr_command_buffer version 0.9.8 is required to run "
"the test, skipping.\n ");
return TEST_SKIPPED_ITSELF;
}

View File

@@ -50,13 +50,14 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
virtual cl_int SetUp(int elements) override
{
BasicCommandBufferTest::SetUp(elements);
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
cl_int error = init_extension_functions();
error = init_extension_functions();
test_error(error, "Unable to initialise extension functions");
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
if (simultaneous_use_requested)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
@@ -90,10 +91,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
if (extension_version != CL_MAKE_VERSION(0, 9, 4))
{
log_info("cl_khr_command_buffer_mutable_dispatch version "
"0.9.3 is "
"0.9.4 is "
"required to run the test, skipping.\n ");
extension_avaliable = false;
}
@@ -128,6 +129,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
}
clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
bool simultaneous_use_requested = false;
const char* kernelString = "__kernel void empty() {}";
const size_t global_work_size = 4 * 16;

View File

@@ -70,9 +70,9 @@ struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest
{
int offset = get_global_id(0);
int4 color = read_imagei( source, sampler, offset );
uint4 color = read_imageui( source, sampler, offset );
write_imagei( dest, offset, color );
write_imageui( dest, offset, color );
})";
cl_int error;
@@ -260,9 +260,9 @@ struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest
int x = get_global_id(0);
int y = get_global_id(1);
int4 color = read_imagei( source, sampler, (int2) (x, y) );
uint4 color = read_imageui( source, sampler, (int2) (x, y) );
write_imagei( dest, (int2) (x, y), color );
write_imageui( dest, (int2) (x, y), color );
})";
cl_int error;

View File

@@ -35,9 +35,7 @@ struct IterativeArgUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{

View File

@@ -33,9 +33,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command_pri(nullptr), command_sec(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{
@@ -47,7 +45,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
// require mutable arguments capabillity
// require mutable arguments capability
return !mutable_support;
}

View File

@@ -34,9 +34,7 @@ struct OverwriteUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
command(nullptr)
{
simultaneous_use_requested = false;
}
{}
bool Skip() override
{

View File

@@ -21,10 +21,12 @@
#include <CL/cl.h>
#include <CL/cl_ext.h>
////////////////////////////////////////////////////////////////////////////////
// mutable dispatch tests which handle following cases:
// - out-of-order queue use
// mutable dispatch tests which handles
// - out-of-order queue with dependencies between command-buffer enqueues
// - out-of-order queue with simultaneous use
// - in-order queue with dependencies between command-buffer enqueues
// - in-order queue with simultaneous use
// - cross queue with dependencies between command-buffer enqueues
// - cross-queue with simultaneous use
namespace {
@@ -35,11 +37,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
SimultaneousMutableDispatchTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
work_queue(nullptr), work_command_buffer(this), user_event(nullptr),
wait_pass_event(nullptr), command(nullptr)
work_queue(nullptr), work_command_buffer(this), new_in_mem(nullptr),
command(nullptr)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
}
cl_int SetUpKernel() override
@@ -48,26 +49,36 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
// create additional kernel to properly prepare output buffer for test
const char* kernel_str =
const char *kernel_str =
R"(
__kernel void fill(int pattern, __global int* out, __global int*
offset)
__kernel void mul(__global int* out, __global int* in, int mul_val)
{
size_t id = get_global_id(0);
size_t ind = offset[0] + id ;
out[ind] = pattern;
out[id] = in[id] * mul_val;
})";
error = create_single_kernel_helper_create_program(
context, &program_fill, 1, &kernel_str);
context, &program_mul, 1, &kernel_str);
test_error(error, "Failed to create program with source");
error =
clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
clBuildProgram(program_mul, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
kernel_fill = clCreateKernel(program_fill, "fill", &error);
test_error(error, "Failed to create copy kernel");
kernel_mul = clCreateKernel(program_mul, "mul", &error);
test_error(error, "Failed to create multiply kernel");
new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
new_in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS;
}
@@ -77,14 +88,13 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
&overwritten_pattern);
error = clSetKernelArg(kernel_mul, 0, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
error = clSetKernelArg(kernel_mul, 1, sizeof(off_mem), &in_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
error = clSetKernelArg(kernel_mul, 2, sizeof(cl_int), &pattern_pri);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
@@ -101,30 +111,28 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
&error);
test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr prop =
CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
else
{
work_queue = queue;
work_command_buffer = command_buffer;
}
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_requested)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
@@ -145,293 +153,245 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|| !mutable_support;
}
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_mul, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Serialize command-buffer enqueue, is a linear sequence of
* commands, with dependencies enforced using an in-order queue
* or cl_event dependencies.
*
* 1. Fill input buffer
* 2. Enqueue command-buffer doing: `output = a * input;
* 3. Read output buffer to host data so it can be verified later
* - Update command to new input buffer, new `a` val and use output
* buffer from previous invocation as new input buffer.
* 4. Enqueue command-buffer again.
* 5. Read new output buffer back to host data so it can be verified
* later
*
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE, 0,
data_size(), first_enqueue_output.data(),
(out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueReadBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &out_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Simultaneous command-buffer pass enqueues a command-buffer twice
* without dependencies between the enqueues, but an update so that
* all the parameters are different to avoid race conditions in the
* kernel execution. The asynchronous task graph looks like:
*
* (Fill input A buffer) (Fill input B buffer)
* | |
* (Enqueue command_buffer) (Enqueue updated command_buffer)
* | |
* (Read output A buffer) (Read output B buffer)
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueFillBuffer(work_queue, new_in_mem, &pattern_fill_2,
sizeof(cl_int), 0, data_size(), 0, nullptr,
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &new_in_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, out_mem, CL_FALSE, 0, data_size(),
first_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int VerifySerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * first_enqueue_ref;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int VerifySimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * pattern_fill_2;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = CL_SUCCESS;
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support)
std::vector<cl_int> first_enqueue_output(num_elements);
std::vector<cl_int> second_enqueue_output(num_elements);
if (simultaneous_use_requested)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
error = RunSimultaneousPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "RunSimultaneousPass failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
error =
RunSerializedPass(first_enqueue_output, second_enqueue_output);
test_error(error, "RunSerializedPass failed");
}
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(work_command_buffer, nullptr, nullptr,
out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSingle()
{
cl_int error;
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error =
clEnqueueReadBuffer(work_queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 1, &single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
cl_int RecordSimultaneousCommandBuffer()
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
&sync_points[0], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
// to avoid overwriting the entire result buffer instead of filling
// only relevant part this additional kernel was introduced
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_fill, 1, nullptr,
&num_elements, nullptr, 0, nullptr, &sync_points[1], &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
std::vector<cl_int> updated_output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
work_queue, off_mem, &pd.offset, sizeof(cl_int), 0, sizeof(cl_int),
(wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
// Retain new output memory object until the end of the test.
retained_output_buffers.push_back(new_out_mem);
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.updated_output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneous()
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
wait_pass_event = pass.wait_events[2];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(work_queue);
test_error(error, "clFinish failed");
// verify the result buffers
auto& first_pass_output = simul_passes[0].output_buffer;
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
auto& second_pass_output = simul_passes[1].output_buffer;
auto& second_pass_updated_output =
simul_passes[1].updated_output_buffer;
for (size_t i = 0; i < num_elements; i++)
if (simultaneous_use_requested)
{
// First pass:
// Before updating, out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
i);
// Second pass:
// Before updating, out_mem is filled with overwritten_pattern
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
i);
error = VerifySimultaneousPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "VerifySimultaneousPass failed");
}
else
{
error = VerifySerializedPass(first_enqueue_output,
second_enqueue_output);
test_error(error, "VerifySerializedPass failed");
}
return CL_SUCCESS;
@@ -440,22 +400,20 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
clCommandQueueWrapper work_queue;
clCommandBufferWrapper work_command_buffer;
clEventWrapper user_event;
clEventWrapper single_event;
clEventWrapper wait_pass_event;
clKernelWrapper kernel_mul;
clProgramWrapper program_mul;
clKernelWrapper kernel_fill;
clProgramWrapper program_fill;
clMemWrapper new_out_mem, new_in_mem;
std::vector<clMemWrapper> retained_output_buffers;
const size_t test_global_work_size = 3 * sizeof(cl_int);
const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC;
const cl_int pattern_fill = 0xA;
const cl_int pattern_fill_2 = -3;
const cl_int overwritten_pattern = 0xACDC;
cl_mutable_command_khr command;
};
template <bool simultaneous_use_request>
struct CrossQueueSimultaneousMutableDispatchTest
: public BasicMutableCommandBufferTest
{
@@ -463,9 +421,9 @@ struct CrossQueueSimultaneousMutableDispatchTest
cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
queue_sec(nullptr), command(nullptr)
queue_sec(nullptr), new_out_mem(nullptr), command(nullptr)
{
simultaneous_use_requested = true;
simultaneous_use_requested = simultaneous_use_request;
}
cl_int SetUpKernel() override
@@ -488,6 +446,11 @@ struct CrossQueueSimultaneousMutableDispatchTest
kernel = clCreateKernel(program, "fill", &error);
test_error(error, "Failed to create copy kernel");
new_out_mem =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements, nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS;
}
@@ -530,24 +493,18 @@ struct CrossQueueSimultaneousMutableDispatchTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !simultaneous_use_support || !mutable_support;
return (simultaneous_use_requested && !simultaneous_use_support)
|| !mutable_support;
}
cl_int Run() override
{
// record command buffer
cl_int pattern = 0;
cl_int error = clCommandFillBufferKHR(
command_buffer, nullptr, nullptr, out_mem, &pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -555,16 +512,15 @@ struct CrossQueueSimultaneousMutableDispatchTest
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// enqueue command buffer to default queue
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
// If we are testing not using simultaneous-use then we need to use
// an event to serialize the execution order to the command-buffer
// submission to each queue.
clEventWrapper E;
error = clEnqueueCommandBufferKHR(
0, nullptr, command_buffer, 0, nullptr,
(simultaneous_use_requested ? nullptr : &E));
test_error(error, "clEnqueueCommandBufferKHR failed");
// update mutable parameters
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
data_size(), nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_0{ 0, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
@@ -594,30 +550,35 @@ struct CrossQueueSimultaneousMutableDispatchTest
test_error(error, "clUpdateMutableCommandsKHR failed");
// enqueue command buffer to non-default queue
error = clEnqueueCommandBufferKHR(1, &queue_sec, command_buffer, 0,
nullptr, nullptr);
error = clEnqueueCommandBufferKHR(
1, &queue_sec, command_buffer, (simultaneous_use_requested ? 0 : 1),
(simultaneous_use_requested ? nullptr : &E), nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue_sec);
test_error(error, "clFinish failed");
// read result of command buffer execution
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
std::vector<cl_int> sec_output_data(num_elements);
error =
clEnqueueReadBuffer(queue_sec, new_out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
sec_output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// verify the result
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i);
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
CHECK_VERIFICATION_ERROR(pattern_sec, sec_output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper queue_sec;
clMemWrapper new_out_mem;
const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC;
cl_mutable_command_khr command;
@@ -637,14 +598,26 @@ REGISTER_TEST(mutable_dispatch_simultaneous_out_of_order)
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_in_order)
{
return MakeAndRunTest<SimultaneousMutableDispatchTest<false, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_in_order)
{
return MakeAndRunTest<SimultaneousMutableDispatchTest<true, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
REGISTER_TEST(mutable_dispatch_cross_queue)
{
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest>(
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
{
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<true>>(
device, context, queue, num_elements);
}

View File

@@ -79,11 +79,7 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue),
command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr),
out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr)
{
simultaneous_use_requested =
(event_mode == EventMode::RET_COMBUF_WAIT_FOR_COMBUF) ? true
: false;
}
{}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
@@ -159,9 +155,6 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
{
if (BasicCommandBufferTest::Skip()) return true;
if (simultaneous_use_requested && !simultaneous_use_support)
return true;
if (out_of_order_requested && !out_of_order_support) return true;
return false;

View File

@@ -48,6 +48,39 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue)
{}
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
return !simultaneous_use_support
|| !(is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME));
}
return false;
}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp() failed");
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
@@ -237,33 +270,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed");
error = clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
clEventWrapper execute_event;
// enqueued command buffer blocked on user event
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// execute command buffer
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "verify_state failed");
test_error(signal_error, "clSetUserEventStatus failed");
error = clWaitForEvents(1, &execute_event);
test_error(error, "Unable to wait for execute event");
// verify executable state
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed");
return CL_SUCCESS;
}

View File

@@ -21,11 +21,9 @@
namespace {
////////////////////////////////////////////////////////////////////////////////
// out-of-order tests for cl_khr_command_buffer which handles below cases:
// -test case for out-of-order command-buffer
// -test an out-of-order command-buffer with simultaneous use
// Tests for cl_khr_command_buffer which handles submitting a command-buffer to
// an out-of-order queue.
template <bool simultaneous_request>
struct OutOfOrderTest : public BasicCommandBufferTest
{
OutOfOrderTest(cl_device_id device, cl_context context,
@@ -35,18 +33,11 @@ struct OutOfOrderTest : public BasicCommandBufferTest
user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
program_fill(nullptr)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
buffer_size_multiplier = 2; // two enqueues of command-buffer
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernel();
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
@@ -74,14 +65,8 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
@@ -98,7 +83,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
@@ -108,110 +92,23 @@ struct OutOfOrderTest : public BasicCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0
};
if (simultaneous_use_requested && simultaneous_use_support)
properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
out_of_order_command_buffer = clCreateCommandBufferKHR(
1, &out_of_order_queue, properties, &error);
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
if (!out_of_order_support
|| (simultaneous_use_requested && !simultaneous_use_support))
return true;
return false;
return !out_of_order_support;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
cl_int RecordCommandBuffer() const
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
nullptr, out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&user_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer() const
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
// fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
@@ -236,79 +133,63 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
// 0: offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[2];
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
cl_int error = clEnqueueFillBuffer(
out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
&pd.wait_events[0]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
&pd.wait_events[2]);
0, nullptr, out_of_order_command_buffer, 1, &pd.wait_events[0],
&pd.wait_events[1]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
&pd.wait_events[1], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int Run() override
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
error = EnqueuePass(pass);
test_error(error, "EnqueuePass failed");
wait_pass_event = pass.wait_events[2];
wait_pass_event = pass.wait_events[1];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(out_of_order_queue);
test_error(error, "clFinish failed");
// verify the result buffers
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
@@ -320,7 +201,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
@@ -338,12 +218,5 @@ struct OutOfOrderTest : public BasicCommandBufferTest
REGISTER_TEST(out_of_order)
{
return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
num_elements);
}
REGISTER_TEST(simultaneous_out_of_order)
{
return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
num_elements);
return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
}

View File

@@ -0,0 +1,321 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "basic_command_buffer.h"
#include <vector>
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests for multiple sequential submissions of a command-buffer without a
// blocking wait between them, but using the following mechanisms to serialize
// execution of the submissions.
// * In-order queue dependencies
// * Event dependencies in command-buffer submissions to an out-of-order queue
// * Barrier submissions between command-buffer submissions to an out-of-order
// queue
// Base class that individual test fixtures are derived from
struct CommandBufferPipelined : public BasicCommandBufferTest
{
CommandBufferPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue)
{}
cl_int SetUpKernel() override
{
const char* mul_kernel_str =
R"(
__kernel void mul_by_val(int in, __global int* data)
{
size_t id = get_global_id(0);
data[id] *= in;
}
__kernel void increment(__global int* data)
{
size_t id = get_global_id(0);
data[id]++;
})";
cl_int error = create_single_kernel_helper_create_program(
context, &program, 1, &mul_kernel_str);
test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
mul_kernel = clCreateKernel(program, "mul_by_val", &error);
test_error(error, "Failed to create mul_by_val kernel");
inc_kernel = clCreateKernel(program, "increment", &error);
test_error(error, "Failed to create increment kernel");
return CL_SUCCESS;
}
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * buffer_size_multiplier
* sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_int val_arg = pattern;
error = clSetKernelArg(mul_kernel, 0, sizeof(cl_int), &val_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(mul_kernel, 1, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(inc_kernel, 0, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer(clCommandBufferWrapper& cmd_buf)
{
cl_int error = clCommandNDRangeKernelKHR(
cmd_buf, nullptr, nullptr, inc_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(cmd_buf);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Zero initialize buffer before starting test
cl_int zero_pattern = 0;
error =
clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
return CL_SUCCESS;
}
const cl_int pattern = 42;
clKernelWrapper inc_kernel = nullptr;
clKernelWrapper mul_kernel = nullptr;
};
struct InOrderPipelined : public CommandBufferPipelined
{
InOrderPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue)
{}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error =
clEnqueueNDRangeKernel(queue, mul_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
};
struct EventPipelined : public CommandBufferPipelined
{
EventPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &events[0]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1,
nullptr, &num_elements, nullptr, 1,
&events[0], &events[1]);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 1, &events[1], &events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
clEventWrapper events[3] = { nullptr, nullptr, nullptr };
};
struct BarrierPipelined : public CommandBufferPipelined
{
BarrierPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error =
clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 0, nullptr,
nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
};
} // anonymous namespace
REGISTER_TEST(pipeline_in_order_deps)
{
return MakeAndRunTest<InOrderPipelined>(device, context, queue,
num_elements);
}
REGISTER_TEST(pipeline_event_deps)
{
return MakeAndRunTest<EventPipelined>(device, context, queue, num_elements);
}
REGISTER_TEST(pipeline_barrier_deps)
{
return MakeAndRunTest<BarrierPipelined>(device, context, queue,
num_elements);
}

View File

@@ -44,27 +44,18 @@
namespace {
////////////////////////////////////////////////////////////////////////////////
// printf tests for cl_khr_command_buffer which handles below cases:
// -test cases for device side printf
// -test cases for device side printf with a simultaneous use command-buffer
// Test for cl_khr_command_buffer which handles a command-buffer containing a
// printf kernel being repeatedly enqueued.
template <bool simul_use>
struct CommandBufferPrintfTest : public BasicCommandBufferTest
{
CommandBufferPrintfTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue),
trigger_event(nullptr), wait_event(nullptr), file_descriptor(0),
printf_use_support(false)
: BasicCommandBufferTest(device, context, queue), file_descriptor(0)
{
simultaneous_use_requested = simul_use;
if (simul_use)
{
buffer_size_multiplier = num_test_iters;
}
buffer_size_multiplier = num_test_iters;
}
//--------------------------------------------------------------------------
void ReleaseOutputStream(int fd)
{
fflush(stdout);
@@ -72,7 +63,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
close(fd);
}
//--------------------------------------------------------------------------
int AcquireOutputStream(int* error)
{
int fd = streamDup(fileno(stdout));
@@ -85,7 +75,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return fd;
}
//--------------------------------------------------------------------------
void GetAnalysisBuffer(std::stringstream& buffer)
{
std::ifstream fp(temp_filename, std::ios::in);
@@ -95,7 +84,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
}
}
//--------------------------------------------------------------------------
void PurgeTempFile()
{
std::ofstream ofs(temp_filename,
@@ -103,9 +91,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
ofs.close();
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
// Query if device supports kernel printf use
cl_device_command_buffer_capabilities_khr capabilities;
cl_int error =
@@ -114,16 +103,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
printf_use_support =
const bool printf_use_support =
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
!= 0;
if (!printf_use_support) return true;
return BasicCommandBufferTest::Skip()
|| (simultaneous_use_requested && !simultaneous_use_support);
return !printf_use_support;
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
cl_int error = CL_SUCCESS;
@@ -153,14 +139,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
size_t data_size() const override
{
return sizeof(cl_char) * num_elements * buffer_size_multiplier
* max_pattern_length;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
@@ -192,7 +176,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
auto pcFname = get_temp_filename();
@@ -209,39 +192,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
// record command buffer with primary queue
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with printf calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with printf calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
std::remove(temp_filename.c_str());
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -251,7 +205,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
#define test_error_release_stdout(errCode, msg) \
{ \
auto errCodeResult = errCode; \
@@ -263,96 +216,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} \
}
//--------------------------------------------------------------------------
cl_int EnqueueSinglePass(const std::vector<cl_char>& pattern,
std::vector<cl_char>& output_data)
{
cl_int error = CL_SUCCESS;
auto in_mem_size = sizeof(cl_char) * pattern.size();
error = clEnqueueWriteBuffer(queue, in_mem, CL_TRUE, 0, in_mem_size,
&pattern[0], 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
test_assert_error(pattern.size() - 1 <= CL_UINT_MAX,
"pattern.size() - 1 does not fit in a cl_uint");
cl_uint offset[] = { 0, static_cast<cl_uint>(pattern.size() - 1) };
error = clEnqueueWriteBuffer(queue, off_mem, CL_TRUE, 0, sizeof(offset),
offset, 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
// redirect output stream to temporary file
file_descriptor = AcquireOutputStream(&error);
if (error != 0)
{
log_error("Error while redirection stdout to file");
return TEST_FAIL;
}
// enqueue command buffer with kernel containing printf command
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &wait_event);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
fflush(stdout);
// Wait until kernel finishes its execution and (thus) the output
// printed from the kernel is immediately printed
error = clWaitForEvents(1, &wait_event);
test_error(error, "clWaitForEvents failed");
// output buffer contains pattern to be compared with printout
error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueReadBuffer failed");
error = clFinish(queue);
test_error_release_stdout(error, "clFinish failed");
ReleaseOutputStream(file_descriptor);
// copy content of temporary file into string stream
std::stringstream sstr;
GetAnalysisBuffer(sstr);
if (sstr.str().size() != num_elements * offset[1])
{
log_error("GetAnalysisBuffer failed\n");
return TEST_FAIL;
}
// verify the result - compare printout and output buffer
for (size_t i = 0; i < num_elements * offset[1]; i++)
{
CHECK_VERIFICATION_ERROR(sstr.str().at(i), output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_char> output_data(num_elements * max_pattern_length);
for (unsigned i = 0; i < num_test_iters; i++)
{
unsigned pattern_length =
std::max(min_pattern_length, rand() % max_pattern_length);
char pattern_character = 'a' + rand() % 26;
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern[pattern_length] = '\0';
error = EnqueueSinglePass(pattern, output_data);
test_error(error, "EnqueueSinglePass failed");
output_data.assign(output_data.size(), 0);
PurgeTempFile();
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
// null terminated character buffer
std::vector<cl_char> pattern;
@@ -361,8 +225,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> output_buffer;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
// write current pattern to device memory
auto in_mem_size = sizeof(cl_char) * pd.pattern.size();
@@ -377,15 +240,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pd.offset, 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
// create user event to block simultaneous command buffers
if (!trigger_event)
{
trigger_event = clCreateUserEvent(context, &error);
test_error_release_stdout(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
// output buffer contains pattern to be compared with printout
@@ -398,14 +254,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int Run() override
{
cl_int error = CL_SUCCESS;
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length);
std::vector<SimulPassData> simul_passes(num_test_iters);
std::vector<EnqueuePassData> enqueue_passes(num_test_iters);
const int pattern_chars_range = 26;
std::list<cl_char> pattern_chars;
@@ -413,7 +269,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pattern_chars.push_back(cl_char('a' + i));
test_assert_error(pattern_chars.size() >= num_test_iters,
"Number of simultaneous launches must be lower than "
"Number of launches must be lower than "
"size of characters container");
cl_int total_pattern_coverage = 0;
@@ -428,11 +284,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern.back() = '\0';
simul_passes[i] = { pattern,
{ cl_int(i * offset), cl_int(pattern_length) },
std::vector<cl_char>(num_elements
* pattern_length) };
total_pattern_coverage += simul_passes[i].output_buffer.size();
enqueue_passes[i] = {
pattern,
{ cl_int(i * offset), cl_int(pattern_length) },
std::vector<cl_char>(num_elements * pattern_length)
};
total_pattern_coverage += enqueue_passes[i].output_buffer.size();
pattern_chars.erase(it);
};
@@ -444,17 +301,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return TEST_FAIL;
}
// enqueue read/write and command buffer operations
for (auto&& pass : simul_passes)
// enqueue read/write and command buffer operations, serialized
// by in-order queue
for (auto&& pass : enqueue_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error_release_stdout(error, "EnqueueSimultaneousPass failed");
error = EnqueuePass(pass);
test_error_release_stdout(error, "EnqueuePass failed");
}
// execute command buffers
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error_release_stdout(error, "clSetUserEventStatus failed");
// flush streams
fflush(stdout);
@@ -477,13 +331,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
for (int i = 0; i < total_pattern_coverage; i++)
counters_map[sstr.str().at(i)]++;
if (counters_map.size() != simul_passes.size())
if (counters_map.size() != enqueue_passes.size())
{
log_error("printout inconsistent with input data\n");
return TEST_FAIL;
}
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
@@ -501,18 +355,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
}
}
std::remove(temp_filename.c_str());
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
clEventWrapper wait_event = nullptr;
std::string temp_filename;
int file_descriptor;
bool printf_use_support;
// specifies max test length for printf pattern
const unsigned max_pattern_length = 6;
// specifies min test length for printf pattern
@@ -523,14 +372,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} // anonymous namespace
REGISTER_TEST(basic_printf)
REGISTER_TEST(printf)
{
return MakeAndRunTest<CommandBufferPrintfTest<false>>(device, context,
queue, num_elements);
}
REGISTER_TEST(simultaneous_printf)
{
return MakeAndRunTest<CommandBufferPrintfTest<true>>(device, context, queue,
num_elements);
return MakeAndRunTest<CommandBufferPrintfTest>(device, context, queue,
num_elements);
}

View File

@@ -86,21 +86,17 @@ cl_int VerifyResult(const clEventWrapper& event)
}
////////////////////////////////////////////////////////////////////////////////
// Command-buffer profiling test cases:
// -all commands are recorded to a single command-queue
// -profiling a command-buffer with simultaneous use
template <bool simultaneous_request>
// Command-buffer profiling test for enqueuing command-buffer twice and checking
// the profiling counters on the events returned.
struct CommandBufferProfiling : public BasicCommandBufferTest
{
CommandBufferProfiling(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{
simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
buffer_size_multiplier = 2; // Do two enqueues of command-buffer
}
//--------------------------------------------------------------------------
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
@@ -127,10 +123,9 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
"Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
return true;
}
return (simultaneous_use_requested && !simultaneous_use_support);
return false;
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
@@ -156,37 +151,45 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
struct EnqueuePassData
{
cl_int offset;
clEventWrapper query_event;
};
cl_int Run() override
{
cl_int error = CL_SUCCESS;
// record command buffer
error = RecordCommandBuffer();
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_requested)
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, clEventWrapper() }, { offset, clEventWrapper() }
};
// In-order queue serialized the command-buffer submissions
for (auto&& pass : enqueue_passes)
{
// enqueue simultaneous command-buffers with profiling command queue
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
error = EnqueuePass(pass);
test_error(error, "EnqueueSerializedPass failed");
}
else
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : enqueue_passes)
{
// enqueue single command-buffer with profiling command queue
error = RunSingle();
test_error(error, "RunSingle failed");
error = VerifyResult(pass.query_event);
test_error(error, "VerifyResult failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -196,41 +199,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper query_event;
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &query_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = VerifyResult(query_event);
test_error(error, "VerifyResult failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
clEventWrapper query_event;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pattern, sizeof(cl_int),
@@ -241,59 +210,13 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!wait_event)
{
wait_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&wait_event, &pd.query_event);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &pd.query_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
}
error = clSetUserEventStatus(wait_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : simul_passes)
{
error = VerifyResult(pass.query_event);
test_error(error, "VerifyResult failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper wait_event;
const cl_int pattern = 0xA;
};
@@ -356,19 +279,13 @@ struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
};
} // anonymous namespace
REGISTER_TEST(basic_profiling)
REGISTER_TEST(profiling)
{
return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
num_elements);
return MakeAndRunTest<CommandBufferProfiling>(device, context, queue,
num_elements);
}
REGISTER_TEST(simultaneous_profiling)
{
return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
num_elements);
}
REGISTER_TEST(substitute_queue_profiling)
REGISTER_TEST(profiling_substitute_queue)
{
return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
device, context, queue, num_elements);

View File

@@ -23,21 +23,16 @@ namespace {
// Command-queue substitution tests which handles below cases:
// -substitution on queue without properties
// -substitution on queue with properties
// -simultaneous use queue substitution
template <bool prop_use, bool simul_use>
template <bool prop_use>
struct SubstituteQueueTest : public BasicCommandBufferTest
{
SubstituteQueueTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue),
properties_use_requested(prop_use), user_event(nullptr)
{
simultaneous_use_requested = simul_use;
if (simul_use) buffer_size_multiplier = 2;
}
properties_use_requested(prop_use)
{}
//--------------------------------------------------------------------------
bool Skip() override
{
if (properties_use_requested)
@@ -57,11 +52,9 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return true;
}
return BasicCommandBufferTest::Skip()
|| (simultaneous_use_requested && !simultaneous_use_support);
return BasicCommandBufferTest::Skip();
}
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override
{
// By default command queue is created without properties,
@@ -81,7 +74,6 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements);
}
//--------------------------------------------------------------------------
cl_int Run() override
{
// record command buffer with primary queue
@@ -106,23 +98,14 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
test_error(error, "clCreateCommandQueue failed");
}
if (simultaneous_use_support)
{
// enque simultaneous command-buffers with substitute queue
error = RunSimultaneous(new_queue);
test_error(error, "RunSimultaneous failed");
}
else
{
// enque single command-buffer with substitute queue
error = RunSingle(new_queue);
test_error(error, "RunSingle failed");
}
// enqueue single command-buffer with substitute queue
error = RunSingle(new_queue);
test_error(error, "RunSingle failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
@@ -135,14 +118,13 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle(const cl_command_queue& q)
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
cl_int error =
clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
cl_command_queue queues[] = { q };
@@ -165,90 +147,8 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int pattern;
cl_int offset;
cl_command_queue queue;
std::vector<cl_int> output_buffer;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = clEnqueueFillBuffer(
pd.queue, in_mem, &pd.pattern, sizeof(cl_int),
pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error =
clEnqueueFillBuffer(pd.queue, off_mem, &pd.offset, sizeof(cl_int),
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
cl_command_queue queues[] = { pd.queue };
error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
pd.queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int),
data_size(), pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous(const cl_command_queue& q)
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ pattern_pri, 0, q, std::vector<cl_int>(num_elements) },
{ pattern_sec, offset, q, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueuePass failed");
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
for (auto&& pass : simul_passes)
{
error = clFinish(pass.queue);
test_error(error, "clFinish failed");
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pass.pattern, res_data[i], i);
}
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
const cl_int pattern_pri = 0xB;
const cl_int pattern_sec = 0xC;
bool properties_use_requested;
clEventWrapper user_event;
};
// Command-queue substitution tests which handles below cases:
@@ -397,20 +297,14 @@ struct QueueOrderTest : public BasicCommandBufferTest
REGISTER_TEST(queue_substitution)
{
return MakeAndRunTest<SubstituteQueueTest<false, false>>(
device, context, queue, num_elements);
return MakeAndRunTest<SubstituteQueueTest<false>>(device, context, queue,
num_elements);
}
REGISTER_TEST(properties_queue_substitution)
REGISTER_TEST(queue_substitution_properties)
{
return MakeAndRunTest<SubstituteQueueTest<true, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(simultaneous_queue_substitution)
{
return MakeAndRunTest<SubstituteQueueTest<false, true>>(
device, context, queue, num_elements);
return MakeAndRunTest<SubstituteQueueTest<true>>(device, context, queue,
num_elements);
}
REGISTER_TEST(queue_substitute_in_order)

View File

@@ -22,25 +22,22 @@ namespace {
////////////////////////////////////////////////////////////////////////////////
// clSetKernelArg tests for cl_khr_command_buffer which handles below cases:
// -test interactions of clSetKernelArg with command-buffers
// -test interactions of clSetKernelArg on a command-buffer pending execution
// -test interactions of clSetKernelArg after command-buffer finalize but
// before enqueue
// -test interactions of clSetKernelArg between command-buffer enqueue
template <bool simul_use>
template <bool enqueue_test>
struct CommandBufferSetKernelArg : public BasicCommandBufferTest
{
CommandBufferSetKernelArg(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), trigger_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{
simultaneous_use_requested = simul_use;
if (simul_use) buffer_size_multiplier = 2;
if (enqueue_test) buffer_size_multiplier = 2;
}
//--------------------------------------------------------------------------
cl_int SetUpKernel() override
{
cl_int error = CL_SUCCESS;
const char* kernel_str =
R"(
__kernel void copy(int in, __global int* out, __global int* offset)
@@ -50,8 +47,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
out[ind] = in;
})";
error = create_single_kernel_helper_create_program(context, &program, 1,
&kernel_str);
cl_int error = create_single_kernel_helper_create_program(
context, &program, 1, &kernel_str);
test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
@@ -63,7 +60,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
@@ -99,15 +95,14 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_requested)
if (enqueue_test)
{
// enqueue simultaneous command-buffers with clSetKernelArg calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
// enqueue command-buffers with clSetKernelArg calls in between
error = RunMultipleEnqueue();
test_error(error, "RunMultipleEnqueue failed");
}
else
{
@@ -119,12 +114,9 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -148,14 +140,12 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
// record command buffer
error = RecordCommandBuffer();
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
const cl_int pattern_base = 0;
@@ -187,20 +177,16 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
struct EnqueuePassData
{
cl_int pattern;
cl_int offset;
std::vector<cl_int> output_buffer;
};
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer() const
cl_int RecordEnqueueCommandBuffer() const
{
cl_int error = CL_SUCCESS;
error = clCommandNDRangeKernelKHR(
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -210,8 +196,7 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
cl_int EnqueuePass(EnqueuePassData& pd)
{
cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pd.pattern, sizeof(cl_int),
@@ -222,14 +207,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!trigger_event)
{
trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
@@ -240,49 +219,39 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
cl_int RunMultipleEnqueue()
{
cl_int error = CL_SUCCESS;
// record command buffer with primary queue
error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int error = RecordEnqueueCommandBuffer();
test_error(error, "RecordEnqueeuCommandBuffer failed");
std::vector<SimulPassData> simul_passes = {
{ 0, 0, std::vector<cl_int>(num_elements) }
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, 0, std::vector<cl_int>(num_elements) },
{ 1, offset, std::vector<cl_int>(num_elements) }
};
error = EnqueueSimultaneousPass(simul_passes.front());
test_error(error, "EnqueueSimultaneousPass 1 failed");
// changing kernel args at this point should have no effect,
// test will verify if clSetKernelArg didn't affect command-buffer
cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "clSetKernelArg failed");
if (simultaneous_use_support)
for (auto&& pass : enqueue_passes)
{
cl_int offset = static_cast<cl_int>(num_elements);
simul_passes.push_back(
{ 1, offset, std::vector<cl_int>(num_elements) });
// changing kernel args at this point should have no effect,
// test will verify if clSetKernelArg didn't affect command-buffer
cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = EnqueueSimultaneousPass(simul_passes.back());
test_error(error, "EnqueueSimultaneousPass 2 failed");
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "clSetKernelArg failed");
error = EnqueuePass(pass);
test_error(error, "EnqueuePass failed");
}
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
// verify the result buffer
for (auto&& pass : simul_passes)
for (auto&& pass : enqueue_passes)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
@@ -294,9 +263,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
const cl_int pattern_pri = 2;
const cl_int pattern_sec = 3;
@@ -305,13 +271,13 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
} // anonymous namespace
REGISTER_TEST(basic_set_kernel_arg)
REGISTER_TEST(set_kernel_arg_after_finalize)
{
return MakeAndRunTest<CommandBufferSetKernelArg<false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(pending_set_kernel_arg)
REGISTER_TEST(set_kernel_arg_after_enqueue)
{
return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context,
queue, num_elements);

View File

@@ -131,9 +131,10 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true;
bool skip = true;
if (simultaneous_use_support)
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false;
}
else if (is_extension_available(
@@ -142,13 +143,6 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
skip = false;
}
else if (is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false;
}
return skip;
}
@@ -185,7 +179,9 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true;
bool skip = true;
if (!simultaneous_use_support)
if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME)
&& !simultaneous_use_support)
{
unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
skip = false;

View File

@@ -66,102 +66,6 @@ struct EnqueueCommandBufferNotFinalized : public BasicCommandBufferTest
}
};
// CL_INVALID_OPERATION if command_buffer was not created with the
// CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR flag and is in the Pending state.
struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
: public BasicCommandBufferTest
{
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState(
cl_device_id device, cl_context context, cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
{}
cl_int Run() override
{
cl_int error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clEnqueueCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
clFinish(queue);
return CL_SUCCESS;
}
cl_int SetUp(int elements) override
{
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
&state, nullptr);
test_error_ret(error, "clGetCommandBufferInfoKHR failed",
TEST_FAIL);
test_assert_error(
state == expected,
"Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query!");
return TEST_PASS;
};
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "State is not Executable");
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
};
// CL_INVALID_VALUE if queues is NULL and num_queues is > 0, or queues is not
// NULL and num_queues is 0.
struct EnqueueCommandBufferNullQueuesNumQueues : public BasicCommandBufferTest
@@ -623,14 +527,6 @@ REGISTER_TEST(negative_enqueue_command_buffer_not_finalized)
device, context, queue, num_elements);
}
REGISTER_TEST(
negative_enqueue_command_buffer_without_simultaneous_no_pending_state)
{
return MakeAndRunTest<
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState>(
device, context, queue, num_elements);
}
REGISTER_TEST(negative_enqueue_command_buffer_null_queues_num_queues)
{
return MakeAndRunTest<EnqueueCommandBufferNullQueuesNumQueues>(

View File

@@ -44,20 +44,9 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
FinalizeCommandBufferNotRecordingState(cl_device_id device,
cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
: BasicCommandBufferTest(device, context, queue)
{}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
return CL_SUCCESS;
}
cl_int Run() override
{
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
@@ -87,18 +76,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
"CL_INVALID_OPERATION",
TEST_FAIL);
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clFinalizeCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
clSetUserEventStatus(user_event, CL_COMPLETE);
clFinish(queue);
return CL_SUCCESS;
}
@@ -114,22 +91,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
return CL_SUCCESS;
}
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
};
};

View File

@@ -398,16 +398,17 @@ struct CommandNDRangeKernelWithKernelEnqueueCall : public BasicCommandBufferTest
const char* kernel_str =
R"(
__kernel void enqueue_call_func() {
}
__kernel void enqueue_call_func(__global int* out_mem) {
out_mem[get_global_id(0)] = 0x1234;
}
__kernel void enqueue_call_kernel() {
queue_t def_q = get_default_queue();
ndrange_t ndrange = ndrange_1D(1);
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{enqueue_call_func();});
}
)";
__kernel void enqueue_call_kernel(__global int* out_mem) {
queue_t def_q = get_default_queue();
ndrange_t ndrange = ndrange_1D(1);
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{enqueue_call_func(out_mem);});
}
)";
std::string build_options = std::string(" ") + cl_std;
error = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -443,7 +444,17 @@ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
cl_int Run() override
{
cl_int error = clCommandNDRangeKernelKHR(
cl_int error = CL_SUCCESS;
clMemWrapper out_mem =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
num_elements * sizeof(cl_int), nullptr, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);

View File

@@ -15,6 +15,7 @@
//
#include "debug_ahb.h"
#include "harness/errorHelpers.h"
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
@@ -191,3 +192,27 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
}
return result;
}
AHardwareBuffer *create_AHB(AHardwareBuffer_Desc *desc)
{
AHardwareBuffer *buffer_ptr = nullptr;
int err = AHardwareBuffer_allocate(desc, &buffer_ptr);
if (err != 0)
{
throw std::runtime_error("AHardwareBuffer_allocate failed with code: "
+ std::to_string(err) + "\n");
}
return buffer_ptr;
}
void log_unsupported_ahb_format(AHardwareBuffer_Desc aHardwareBufferDesc)
{
std::string usage_string = ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(aHardwareBufferDesc.usage));
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, %u, "
"layers = %u)\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str(), aHardwareBufferDesc.width,
aHardwareBufferDesc.height, aHardwareBufferDesc.layers);
}

View File

@@ -19,6 +19,7 @@
#include <string>
#include <vector>
#include <numeric>
#include <CL/cl.h>
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
@@ -40,3 +41,90 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
std::string
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);
AHardwareBuffer* create_AHB(AHardwareBuffer_Desc* desc);
void log_unsupported_ahb_format(AHardwareBuffer_Desc desc);
struct AHardwareBufferWrapper
{
AHardwareBuffer* m_ahb;
AHardwareBufferWrapper(): m_ahb(nullptr) {}
AHardwareBufferWrapper(AHardwareBuffer* ahb) { m_ahb = ahb; }
AHardwareBufferWrapper(AHardwareBuffer_Desc* desc)
{
m_ahb = create_AHB(desc);
}
AHardwareBufferWrapper& operator=(AHardwareBuffer* rhs)
{
release();
m_ahb = rhs;
return *this;
}
~AHardwareBufferWrapper() { release(); }
// Copy constructor
AHardwareBufferWrapper(AHardwareBufferWrapper const& ahbw)
: m_ahb(ahbw.m_ahb)
{
retain();
}
// Copy assignment operator
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper const& rhs)
{
release();
m_ahb = rhs.m_ahb;
retain();
return *this;
}
// Move constructor
AHardwareBufferWrapper(AHardwareBufferWrapper&& ahbw)
{
m_ahb = ahbw.m_ahb;
ahbw.m_ahb = nullptr;
}
// Move assignment operator
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper&& rhs)
{
if (this != &rhs)
{
release(); // Giving up current reference
m_ahb = rhs.m_ahb;
rhs.m_ahb = nullptr;
}
return *this;
}
void retain()
{
if (nullptr != m_ahb)
{
AHardwareBuffer_acquire(m_ahb);
}
}
void release()
{
if (nullptr != m_ahb)
{
AHardwareBuffer_release(m_ahb);
}
}
// Usage operators
operator AHardwareBuffer*() { return m_ahb; }
cl_mem_properties get_props()
{
return reinterpret_cast<cl_mem_properties>(m_ahb);
}
};

View File

@@ -97,7 +97,7 @@ static const char *diff_images_kernel_source = {
};
// Checks that the inferred image format is correct
REGISTER_TEST(test_images)
REGISTER_TEST(images)
{
cl_int err = CL_SUCCESS;
@@ -134,19 +134,15 @@ REGISTER_TEST(test_images)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
const cl_mem_properties props[] = {
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
cl_mem image = clCreateImageWithProperties(
@@ -181,8 +177,6 @@ REGISTER_TEST(test_images)
test_error(clReleaseMemObject(image),
"Failed to release image");
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
}
}
}
@@ -190,7 +184,7 @@ REGISTER_TEST(test_images)
return TEST_PASS;
}
REGISTER_TEST(test_images_read)
REGISTER_TEST(images_read)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -238,15 +232,11 @@ REGISTER_TEST(test_images_read)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -279,7 +269,7 @@ REGISTER_TEST(test_images_read)
generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -301,7 +291,7 @@ REGISTER_TEST(test_images_read)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -394,7 +384,7 @@ REGISTER_TEST(test_images_read)
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReleaseExternalMemObjectsKHR(
queue, 1, &opencl_image, 0, nullptr, nullptr);
queue, 1, &imported_image, 0, nullptr, nullptr);
test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed");
// Read buffer and verify
@@ -482,9 +472,6 @@ REGISTER_TEST(test_images_read)
}
}
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
}
}
}
@@ -492,7 +479,7 @@ REGISTER_TEST(test_images_read)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_read_image)
REGISTER_TEST(enqueue_read_image)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -540,15 +527,12 @@ REGISTER_TEST(test_enqueue_read_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -581,7 +565,7 @@ REGISTER_TEST(test_enqueue_read_image)
generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -601,9 +585,9 @@ REGISTER_TEST(test_enqueue_read_image)
return TEST_FAIL;
}
const cl_mem_properties props[] = {
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -662,9 +646,6 @@ REGISTER_TEST(test_enqueue_read_image)
out_image_ptr += imageInfo.rowPitch;
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0)
{
test_fail("Zero bytes matched");
@@ -676,7 +657,7 @@ REGISTER_TEST(test_enqueue_read_image)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_copy_image)
REGISTER_TEST(enqueue_copy_image)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -724,15 +705,12 @@ REGISTER_TEST(test_enqueue_copy_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -765,7 +743,7 @@ REGISTER_TEST(test_enqueue_copy_image)
generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -787,7 +765,7 @@ REGISTER_TEST(test_enqueue_copy_image)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -975,9 +953,6 @@ REGISTER_TEST(test_enqueue_copy_image)
}
}
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
}
}
}
@@ -985,7 +960,7 @@ REGISTER_TEST(test_enqueue_copy_image)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_copy_image_to_buffer)
REGISTER_TEST(enqueue_copy_image_to_buffer)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -1033,15 +1008,12 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1074,7 +1046,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -1096,7 +1068,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1165,9 +1137,6 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
out_buffer_ptr += scanlineSize;
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0)
{
test_fail("Zero bytes matched");
@@ -1179,7 +1148,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_copy_buffer_to_image)
REGISTER_TEST(enqueue_copy_buffer_to_image)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -1227,15 +1196,12 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1275,7 +1241,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1307,7 +1273,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
&hardware_buffer_desc);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -1366,9 +1332,6 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
return TEST_FAIL;
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0)
{
test_fail("Zero bytes matched");
@@ -1380,7 +1343,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_write_image)
REGISTER_TEST(enqueue_write_image)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -1428,15 +1391,12 @@ REGISTER_TEST(test_enqueue_write_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1453,7 +1413,7 @@ REGISTER_TEST(test_enqueue_write_image)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1503,7 +1463,7 @@ REGISTER_TEST(test_enqueue_write_image)
&hardware_buffer_desc);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -1564,9 +1524,6 @@ REGISTER_TEST(test_enqueue_write_image)
return TEST_FAIL;
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0)
{
test_fail("Zero bytes matched");
@@ -1578,7 +1535,7 @@ REGISTER_TEST(test_enqueue_write_image)
return TEST_PASS;
}
REGISTER_TEST(test_enqueue_fill_image)
REGISTER_TEST(enqueue_fill_image)
{
cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed);
@@ -1626,15 +1583,12 @@ REGISTER_TEST(test_enqueue_fill_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
.c_str());
// Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1650,7 +1604,7 @@ REGISTER_TEST(test_enqueue_fill_image)
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1739,7 +1693,7 @@ REGISTER_TEST(test_enqueue_fill_image)
&hardware_buffer_desc);
void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock(
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
@@ -1819,8 +1773,6 @@ REGISTER_TEST(test_enqueue_fill_image)
return TEST_FAIL;
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
free(verificationLine);
if (total_matched == 0)
@@ -1834,7 +1786,7 @@ REGISTER_TEST(test_enqueue_fill_image)
return TEST_PASS;
}
REGISTER_TEST(test_blob)
REGISTER_TEST(blob)
{
cl_int err = CL_SUCCESS;
@@ -1883,19 +1835,17 @@ REGISTER_TEST(test_blob)
continue;
}
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info(
"Testing %s\n",
ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str());
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
aHardwareBuffer.get_props(), 0
};
cl_mem buffer = clCreateBufferWithProperties(
@@ -1903,8 +1853,6 @@ REGISTER_TEST(test_blob)
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
}
return TEST_PASS;

View File

@@ -21,7 +21,7 @@
#include <android/hardware_buffer.h>
#include "debug_ahb.h"
REGISTER_TEST(test_buffer_format_negative)
REGISTER_TEST(buffer_format_negative)
{
cl_int err = CL_SUCCESS;
@@ -98,7 +98,7 @@ REGISTER_TEST(test_buffer_format_negative)
return TEST_PASS;
}
REGISTER_TEST(test_buffer_size_negative)
REGISTER_TEST(buffer_size_negative)
{
cl_int err = CL_SUCCESS;
constexpr size_t buffer_size = 64;
@@ -175,7 +175,7 @@ REGISTER_TEST(test_buffer_size_negative)
return TEST_PASS;
}
REGISTER_TEST(test_images_negative)
REGISTER_TEST(images_negative)
{
cl_int err = CL_SUCCESS;
@@ -244,3 +244,155 @@ REGISTER_TEST(test_images_negative)
return TEST_PASS;
}
REGISTER_TEST(invalid_arguments)
{
cl_int err;
constexpr cl_uint buffer_size = 4096;
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
AHardwareBufferWrapper ahb_buffer(&aHardwareBufferDesc);
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 64;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
AHardwareBufferWrapper ahb_image(&aHardwareBufferDesc);
const cl_mem_properties props_buffer[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
ahb_buffer.get_props(),
0,
};
const cl_mem_properties props_image[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
ahb_image.get_props(),
0,
};
// stub values
cl_image_format image_format = { 0 };
cl_image_desc image_desc = { 0 };
int host_data = 0;
void *host_ptr = reinterpret_cast<void *>(&host_data);
log_info("Testing buffer error conditions\n");
// Buffer error conditions
clMemWrapper mem =
clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
buffer_size + 1, nullptr, &err);
if (CL_INVALID_BUFFER_SIZE != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_BUFFER_SIZE "
"but returned %s: CL_INVALID_BUFFER_SIZE if size is non-zero and "
"greater than the AHardwareBuffer when importing external memory "
"using cl_khr_external_memory_android_hardware_buffer\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateBufferWithProperties(context, props_image, CL_MEM_READ_WRITE,
0, nullptr, &err);
if (CL_INVALID_OPERATION != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_OPERATION "
"but returned %s: CL_INVALID_OPERATION if the AHardwareBuffer "
"format is not AHARDWAREBUFFER_FORMAT_BLOB\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
0, host_ptr, &err);
if (CL_INVALID_HOST_PTR != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
IGetErrorString(err));
return TEST_FAIL;
}
log_info("Testing image error conditions\n");
// Image error conditions
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
&image_format, nullptr, nullptr, &err);
if (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR != err)
{
log_error(
"clCreateBufferWithProperties should return "
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR but returned %s: "
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if image_format is not NULL "
"when using cl_khr_external_memory_android_hardware_buffer.\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
nullptr, &image_desc, nullptr, &err);
if (CL_INVALID_IMAGE_DESCRIPTOR != err)
{
log_error("clCreateBufferWithProperties should return "
"CL_INVALID_IMAGE_DESCRIPTOR but returned %s: "
"CL_INVALID_IMAGE_DESCRIPTOR if image_desc is not NULL when "
"using cl_khr_external_memory_android_hardware_buffer.\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
nullptr, nullptr, nullptr, &err);
if (CL_IMAGE_FORMAT_NOT_SUPPORTED != err)
{
log_error("clCreateBufferWithProperties should return "
"CL_IMAGE_FORMAT_NOT_SUPPORTED but returned %s: "
"CL_IMAGE_FORMAT_NOT_SUPPORTED if AHardwareBuffer's format "
"is not supported\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
nullptr, nullptr, host_ptr, &err);
if (CL_INVALID_HOST_PTR != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
IGetErrorString(err));
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -0,0 +1,26 @@
if (WIN32)
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR})
link_directories(${CL_LIB_DIR})
list(APPEND CLConform_LIBRARIES directx_wrapper)
set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE)
set(${MODULE_NAME}_SOURCES
main.cpp
test_external_semaphore_dx_fence.cpp
test_external_semaphore_dx_fence_negative_wait_signal.cpp
test_external_semaphore_dx_fence_queries.cpp
test_external_semaphore_dx_fence_export.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories("../../common/directx_wrapper")
include(../../CMakeCommon.txt)
endif (WIN32)

View File

@@ -0,0 +1,22 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0);
}

View File

@@ -0,0 +1,117 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include "harness/errorHelpers.h"
#include "directx_wrapper.hpp"
class CLDXSemaphoreWrapper {
public:
CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
ID3D12Device* dx_device)
: device(device), context(context), dx_device(dx_device){};
int createSemaphoreFromFence(ID3D12Fence* fence)
{
cl_int errcode = CL_SUCCESS;
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
const HRESULT hr = dx_device->CreateSharedHandle(
fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
return CL_SUCCESS;
}
~CLDXSemaphoreWrapper()
{
releaseSemaphore();
if (fence_handle)
{
CloseHandle(fence_handle);
}
};
const cl_semaphore_khr* operator&() const { return &semaphore; };
cl_semaphore_khr operator*() const { return semaphore; };
HANDLE getHandle() const { return fence_handle; };
private:
cl_semaphore_khr semaphore;
ComPtr<ID3D12Fence> fence;
HANDLE fence_handle;
cl_device_id device;
cl_context context;
ComPtr<ID3D12Device> dx_device;
int releaseSemaphore() const
{
GET_PFN(device, clReleaseSemaphoreKHR);
if (semaphore)
{
clReleaseSemaphoreKHR(semaphore);
}
return CL_SUCCESS;
}
};
static bool
is_import_handle_available(cl_device_id device,
const cl_external_memory_handle_type_khr handle_type)
{
int errcode = CL_SUCCESS;
size_t import_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
nullptr, &import_types_size);
if (errcode != CL_SUCCESS)
{
log_error("Could not query import semaphore handle types");
return false;
}
std::vector<cl_external_semaphore_handle_type_khr> import_types(
import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
import_types_size, import_types.data(), nullptr);
if (errcode != CL_SUCCESS)
{
log_error("Could not query import semaphore handle types");
return false;
}
return std::find(import_types.begin(), import_types.end(), handle_type)
!= import_types.end();
}

View File

@@ -0,0 +1,324 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a signal followed by a wait in OpenCL will complete successfully
REGISTER_TEST(test_external_semaphores_signal_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to signal semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to wait semaphore");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
// Verify that the events completed.
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_fence->Signal()\n");
const HRESULT hr = (*fence)->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
const HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that interlocking waits between OpenCL and DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling d3d12_command_queue->Wait(1)\n");
cl_semaphore_payload_khr semaphore_payload = 1;
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling d3d12_command_queue->Signal(2)\n");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
semaphore_payload += 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
test_assert_event_complete(signal_event);
return TEST_PASS;
}
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
"Could not create semaphore");
const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
"Could not create semaphore");
const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
cl_semaphore_payload_khr semaphore_payload_list[] = {
semaphore_payload, semaphore_payload + 1
};
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr,
&wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
semaphore_payload_list[0] = semaphore_payload + 3;
semaphore_payload_list[1] = semaphore_payload + 2;
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr,
&signal_event);
test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
// Wait until the GPU has completed commands up to this fence point.
log_info("Waiting for D3D12 command queue completion\n");
if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 1 event on completion");
WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle);
}
if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 2 event on completion");
WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle);
}
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(signal_event);
return TEST_PASS;
}

View File

@@ -0,0 +1,220 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a wait followed by a signal in DirectX 12 using an exported
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
export_types_size, export_types.data(), nullptr);
test_error(errcode, "Could not query export semaphore handle types");
if (std::find(export_types.begin(), export_types.end(),
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to wait semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling fence->Signal()\n");
const HRESULT hr = fence->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
return TEST_PASS;
}
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
export_types_size, export_types.data(), nullptr);
test_error(errcode, "Could not query export semaphore handle types");
if (std::find(export_types.begin(), export_types.end(),
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to signal semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling WaitForSingleObject\n");
if (fence->GetCompletedValue() < semaphore_payload)
{
const HANDLE event =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = fence->SetEventOnCompletion(semaphore_payload, event);
test_error(FAILED(hr), "Failed to set event on completion");
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(signal_event);
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
return TEST_PASS;
}

View File

@@ -0,0 +1,89 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a wait without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
test_assert_error(
errcode == CL_INVALID_VALUE,
"Unexpected error code returned from clEnqueueWaitSemaphores");
return TEST_PASS;
}
// Confirm that a signal without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
test_assert_error(
errcode == CL_INVALID_VALUE,
"Unexpected error code returned from clEnqueueSignalSemaphores");
return TEST_PASS;
}

View File

@@ -0,0 +1,69 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
size_t properties_size_bytes = 0;
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
nullptr, &properties_size_bytes);
test_error(errcode, "Could not get semaphore info");
std::vector<cl_semaphore_properties_khr> semaphore_properties(
properties_size_bytes / sizeof(cl_semaphore_properties_khr));
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
properties_size_bytes,
semaphore_properties.data(), nullptr);
test_error(errcode, "Could not get semaphore info");
for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
{
if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
&& semaphore_properties[i + 1]
== reinterpret_cast<cl_semaphore_properties_khr>(
semaphore.getHandle()))
{
return TEST_PASS;
}
}
log_error(
"Failed to find the dx fence handle type in the semaphore properties");
return TEST_FAIL;
}

View File

@@ -35,7 +35,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0);
int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0);
if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE);
@@ -47,12 +47,12 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((wgid % 2) == 0)
ptr = localPtr;
int inc = atomic_fetch_add(ptr, 1);
int inc = atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
// In the cases where the local memory ptr was used,
// save off the final value.
if ((wgid % 2) == 0 && inc == (wgsize-1))
atomic_store(&globalPtr[wgid], inc);
atomic_store_explicit(&globalPtr[wgid], inc, memory_order_relaxed, memory_scope_work_group);
}
)OpenCLC";
@@ -67,7 +67,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0);
int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0);
if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE);
@@ -79,14 +79,17 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((tid % 2) == 0)
ptr = localPtr;
atomic_fetch_add(ptr, 1);
atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE);
// In the cases where the local memory ptr was used,
// save off the final value.
if (tid == 0)
atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr));
atomic_store_explicit(&globalPtr[(wgid * 2) + 1],
atomic_load_explicit(localPtr, memory_order_relaxed, memory_scope_work_group),
memory_order_relaxed,
memory_scope_work_group);
}
)OpenCLC";
}

View File

@@ -200,7 +200,7 @@ int main(int argc, const char *argv[])
}
}
if (argc > 1 && strcmp(argv[1], "-list") == 0)
if (gListTests)
{
log_info("Available 2.x tests:\n");
for (int i = 0; i < test_num; i++)

BIN
test_conformance/spir/half.zip Normal file → Executable file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -43,35 +43,37 @@
//
// Task
//
Task::Task(cl_device_id device, const char* options):
m_devid(device) {
if (options)
m_options = options;
Task::Task(cl_device_id device, const char* options): m_devid(device)
{
if (options) m_options = options;
}
Task::~Task() {}
const char* Task::getErrorLog() const {
return m_log.c_str();
}
const char* Task::getErrorLog() const { return m_log.c_str(); }
void Task::setErrorLog(cl_program prog) {
void Task::setErrorLog(cl_program prog)
{
size_t len = 0;
std::vector<char> log;
cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
if(err_code != CL_SUCCESS)
cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG,
0, NULL, &len);
if (err_code != CL_SUCCESS)
{
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) failed.\n";
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) "
"failed.\n";
return;
}
log.resize(len, 0);
err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len, &log[0], NULL);
if(err_code != CL_SUCCESS)
err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len,
&log[0], NULL);
if (err_code != CL_SUCCESS)
{
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) failed.\n";
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) "
"failed.\n";
return;
}
m_log.append(&log[0]);
@@ -84,10 +86,11 @@ BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options)
: Task(dev, options), m_program(prog)
{}
bool BuildTask::execute() {
cl_int err_code = clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
if(CL_SUCCESS == err_code)
return true;
bool BuildTask::execute()
{
cl_int err_code =
clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
if (CL_SUCCESS == err_code) return true;
setErrorLog(m_program);
return false;
@@ -96,8 +99,10 @@ bool BuildTask::execute() {
//
// SpirBuildTask
//
SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev, const char* options) :
BuildTask(prog, dev, options) {}
SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev,
const char* options)
: BuildTask(prog, dev, options)
{}
//
// CompileTask
@@ -107,47 +112,43 @@ CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options)
: Task(dev, options), m_program(prog)
{}
void CompileTask::addHeader(const char* hname, cl_program hprog) {
void CompileTask::addHeader(const char* hname, cl_program hprog)
{
m_headers.push_back(std::make_pair(hname, hprog));
}
const char* first(std::pair<const char*,cl_program>& p) {
return p.first;
}
const char* first(std::pair<const char*, cl_program>& p) { return p.first; }
cl_program second(const std::pair<const char*, cl_program>& p) {
cl_program second(const std::pair<const char*, cl_program>& p)
{
return p.second;
}
bool CompileTask::execute() {
bool CompileTask::execute()
{
// Generating the header names vector.
std::vector<const char*> names;
std::transform(m_headers.begin(), m_headers.end(), names.begin(), first);
// Generating the header programs vector.
std::vector<cl_program> programs;
std::transform(m_headers.begin(), m_headers.end(), programs.begin(), second);
std::transform(m_headers.begin(), m_headers.end(), programs.begin(),
second);
const char** h_names = NULL;
const cl_program* h_programs = NULL;
if (!m_headers.empty())
{
h_programs = &programs[0];
h_names = &names[0];
h_names = &names[0];
}
// Compiling with the headers.
cl_int err_code = clCompileProgram(
m_program,
1U,
&m_devid,
m_options.c_str(),
m_headers.size(), // # of headers
h_programs,
h_names,
NULL, NULL);
if (CL_SUCCESS == err_code)
return true;
cl_int err_code =
clCompileProgram(m_program, 1U, &m_devid, m_options.c_str(),
m_headers.size(), // # of headers
h_programs, h_names, NULL, NULL);
if (CL_SUCCESS == err_code) return true;
setErrorLog(m_program);
return false;
@@ -156,8 +157,10 @@ bool CompileTask::execute() {
//
// SpirCompileTask
//
SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev, const char* options) :
CompileTask(prog, dev, options) {}
SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev,
const char* options)
: CompileTask(prog, dev, options)
{}
//
@@ -169,13 +172,16 @@ LinkTask::LinkTask(cl_program* programs, int num_programs, cl_context ctxt,
m_numPrograms(num_programs), m_context(ctxt)
{}
bool LinkTask::execute() {
bool LinkTask::execute()
{
cl_int err_code;
int i;
for(i = 0; i < m_numPrograms; ++i)
for (i = 0; i < m_numPrograms; ++i)
{
err_code = clCompileProgram(m_programs[i], 1, &m_devid, "-x spir -spir-std=1.2 -cl-kernel-arg-info", 0, NULL, NULL, NULL, NULL);
err_code = clCompileProgram(m_programs[i], 1, &m_devid,
"-x spir -spir-std=1.2 -cl-kernel-arg-info",
0, NULL, NULL, NULL, NULL);
if (CL_SUCCESS != err_code)
{
setErrorLog(m_programs[i]);
@@ -183,91 +189,78 @@ bool LinkTask::execute() {
}
}
m_executable = clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms, m_programs, NULL, NULL, &err_code);
if (CL_SUCCESS == err_code)
return true;
m_executable =
clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms,
m_programs, NULL, NULL, &err_code);
if (CL_SUCCESS == err_code) return true;
if(m_executable) setErrorLog(m_executable);
if (m_executable) setErrorLog(m_executable);
return false;
}
cl_program LinkTask::getExecutable() const {
return m_executable;
}
cl_program LinkTask::getExecutable() const { return m_executable; }
LinkTask::~LinkTask() {
if(m_executable) clReleaseProgram(m_executable);
LinkTask::~LinkTask()
{
if (m_executable) clReleaseProgram(m_executable);
}
//
// KernelEnumerator
//
void KernelEnumerator::process(cl_program prog) {
void KernelEnumerator::process(cl_program prog)
{
const size_t MAX_KERNEL_NAME = 64;
size_t num_kernels;
cl_int err_code = clGetProgramInfo(
prog,
CL_PROGRAM_NUM_KERNELS,
sizeof(size_t),
&num_kernels,
NULL
);
if (CL_SUCCESS != err_code)
return;
cl_int err_code = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS,
sizeof(size_t), &num_kernels, NULL);
if (CL_SUCCESS != err_code) return;
// Querying for the number of kernels.
size_t buffer_len = sizeof(char)*num_kernels*MAX_KERNEL_NAME;
size_t buffer_len = sizeof(char) * num_kernels * MAX_KERNEL_NAME;
char* kernel_names = new char[buffer_len];
memset(kernel_names, '\0', buffer_len);
size_t str_len = 0;
err_code = clGetProgramInfo(
prog,
CL_PROGRAM_KERNEL_NAMES,
buffer_len,
(void *)kernel_names,
&str_len
);
if (CL_SUCCESS != err_code)
return;
err_code = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, buffer_len,
(void*)kernel_names, &str_len);
if (CL_SUCCESS != err_code) return;
//parsing the names and inserting them to the list
// parsing the names and inserting them to the list
std::string names(kernel_names);
assert (str_len == 1+names.size() && "incompatible string lengths");
assert(str_len == 1 + names.size() && "incompatible string lengths");
size_t offset = 0;
for(size_t i=0 ; i<names.size() ; ++i){
//kernel names are separated by semi colons
if (names[i] == ';'){
m_kernels.push_back(names.substr(offset, i-offset));
offset = i+1;
for (size_t i = 0; i < names.size(); ++i)
{
// kernel names are separated by semi colons
if (names[i] == ';')
{
m_kernels.push_back(names.substr(offset, i - offset));
offset = i + 1;
}
}
m_kernels.push_back(names.substr(offset, names.size()-offset));
m_kernels.push_back(names.substr(offset, names.size() - offset));
delete[] kernel_names;
}
KernelEnumerator::KernelEnumerator(cl_program prog) {
process(prog);
}
KernelEnumerator::KernelEnumerator(cl_program prog) { process(prog); }
KernelEnumerator::iterator KernelEnumerator::begin(){
KernelEnumerator::iterator KernelEnumerator::begin()
{
return m_kernels.begin();
}
KernelEnumerator::iterator KernelEnumerator::end(){
return m_kernels.end();
}
KernelEnumerator::iterator KernelEnumerator::end() { return m_kernels.end(); }
size_t KernelEnumerator::size() const {
return m_kernels.size();
}
size_t KernelEnumerator::size() const { return m_kernels.size(); }
/**
Run the single test - run the test for both CL and SPIR versions of the kernel
*/
static bool run_test(cl_context context, cl_command_queue queue, cl_program clprog,
cl_program bcprog, const std::string& kernel_name, std::string& err, const cl_device_id device,
float ulps)
static bool run_test(cl_context context, cl_command_queue queue,
cl_program clprog, cl_program bcprog,
const std::string& kernel_name, std::string& err,
const cl_device_id device, float ulps)
{
WorkSizeInfo ws;
TestResult cl_result;
@@ -276,28 +269,29 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
{
// make sure that the kernel will be released before the program
clKernelWrapper kernel = create_kernel_helper(clprog, kernel_name);
// based on the kernel characteristics, we are generating and initializing the arguments for both phases (cl and bc executions)
// based on the kernel characteristics, we are generating and
// initializing the arguments for both phases (cl and bc executions)
generate_kernel_data(context, kernel, ws, cl_result);
bc_result.reset(cl_result.clone(context, ws, kernel, device));
assert (compare_results(cl_result, *bc_result, ulps) && "not equal?");
run_kernel( kernel, queue, ws, cl_result );
assert(compare_results(cl_result, *bc_result, ulps) && "not equal?");
run_kernel(kernel, queue, ws, cl_result);
}
// now, run the single BC test
{
// make sure that the kernel will be released before the program
clKernelWrapper kernel = create_kernel_helper(bcprog, kernel_name);
run_kernel( kernel, queue, ws, *bc_result );
run_kernel(kernel, queue, ws, *bc_result);
}
int error = clFinish(queue);
if( CL_SUCCESS != error)
if (CL_SUCCESS != error)
{
err = "clFinish failed\n";
return false;
}
// compare the results
if( !compare_results(cl_result, *bc_result, ulps) )
if (!compare_results(cl_result, *bc_result, ulps))
{
err = " (result diff in kernel '" + kernel_name + "').";
return false;
@@ -308,16 +302,16 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
/**
Get the maximum relative error defined as ULP of floating-point math functions
*/
static float get_max_ulps(const char *test_name)
static float get_max_ulps(const char* test_name)
{
float ulps = 0.f;
// Get ULP values from math_brute_force functionList
if (strstr(test_name, "math_kernel"))
{
for( size_t i = 0; i < functionListCount; i++ )
for (size_t i = 0; i < functionListCount; i++)
{
char name[64];
const Func *func = &functionList[ i ];
const Func* func = &functionList[i];
sprintf(name, ".%s_float", func->name);
if (strstr(test_name, name))
{
@@ -336,16 +330,17 @@ static float get_max_ulps(const char *test_name)
return ulps;
}
TestRunner::TestRunner(EventHandler *success, EventHandler *failure,
const OclExtensions& devExt):
m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt) {}
TestRunner::TestRunner(EventHandler* success, EventHandler* failure,
const OclExtensions& devExt)
: m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt)
{}
/**
Based on the test name build the cl file name, the bc file name and execute
the kernel for both modes (cl and bc).
*/
bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
const char *test_name, cl_uint size_t_width)
bool TestRunner::runBuildTest(cl_device_id device, const char* folder,
const char* test_name, cl_uint size_t_width)
{
int failures = 0;
// Composing the name of the CSV file.
@@ -365,28 +360,35 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
cl_bool images3D = khrDb.isImages3DRequired(folder, test_name);
char deviceProfile[64];
clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile), &deviceProfile, NULL);
clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile),
&deviceProfile, NULL);
std::string device_profile(deviceProfile, 64);
if(images == CL_TRUE && checkForImageSupport(device) != 0)
if (images == CL_TRUE && checkForImageSupport(device) != 0)
{
(*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to Images is not supported)." << std::endl;
std::cout
<< "Skipped. (Cannot run on device due to Images is not supported)."
<< std::endl;
return true;
}
if(images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
if (images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
{
(*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device as 3D images are not supported)." << std::endl;
std::cout
<< "Skipped. (Cannot run on device as 3D images are not supported)."
<< std::endl;
return true;
}
OclExtensions requiredExt = khrDb.getRequiredExtensions(folder, test_name);
if(!m_devExt->supports(requiredExt))
if (!m_devExt->supports(requiredExt))
{
(*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to missing extensions: " << m_devExt->get_missing(requiredExt) << " )." << std::endl;
std::cout
<< "Skipped. (Cannot run on device due to missing extensions: "
<< m_devExt->get_missing(requiredExt) << " )." << std::endl;
return true;
}
@@ -409,17 +411,26 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
cl_device_fp_config gFloatCapabilities = 0;
cl_int err;
if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(gFloatCapabilities), &gFloatCapabilities,
NULL)))
{
log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", err);
log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
err);
}
if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr")) {
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0) {
if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr"))
{
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
{
(*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to missing CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)" << std::endl;
std::cout << "Skipped. (Cannot run on device due to missing "
"CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)"
<< std::endl;
return true;
} else {
}
else
{
bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt";
cloptions += " -cl-fp32-correctly-rounded-divide-sqrt";
}
@@ -427,33 +438,39 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
// Building the programs.
BuildTask clBuild(clprog, device, cloptions.c_str());
if (!clBuild.execute()) {
if (!clBuild.execute())
{
std::cerr << clBuild.getErrorLog() << std::endl;
(*m_failureHandler)(test_name, "");
return false;
}
SpirBuildTask bcBuild(bcprog, device, bcoptions.c_str());
if (!bcBuild.execute()) {
if (!bcBuild.execute())
{
std::cerr << bcBuild.getErrorLog() << std::endl;
(*m_failureHandler)(test_name, "");
return false;
}
KernelEnumerator clkernel_enumerator(clprog),
bckernel_enumerator(bcprog);
if (clkernel_enumerator.size() != bckernel_enumerator.size()) {
KernelEnumerator clkernel_enumerator(clprog), bckernel_enumerator(bcprog);
if (clkernel_enumerator.size() != bckernel_enumerator.size())
{
std::cerr << "number of kernels in test" << test_name
<< " doesn't match in bc and cl files" << std::endl;
(*m_failureHandler)(test_name, "");
return false;
}
KernelEnumerator::iterator it = clkernel_enumerator.begin(),
e = clkernel_enumerator.end();
e = clkernel_enumerator.end();
while (it != e)
{
std::string kernel_name = *it++;
std::string err;
try
{
bool success = run_test(context, queue, clprog, bcprog, kernel_name, err, device, ulps);
bool success = run_test(context, queue, clprog, bcprog, kernel_name,
err, device, ulps);
if (success)
{
log_info("kernel '%s' passed.\n", kernel_name.c_str());
@@ -468,7 +485,8 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
} catch (const std::runtime_error& err)
{
++failures;
log_info("kernel '%s' failed: %s\n", kernel_name.c_str(), err.what());
log_info("kernel '%s' failed: %s\n", kernel_name.c_str(),
err.what());
(*m_failureHandler)(test_name, kernel_name);
}
}
@@ -476,4 +494,3 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
log_info("%s %s\n", test_name, failures ? "FAILED" : "passed.");
return failures == 0;
}

View File

@@ -17,7 +17,7 @@ else()
add_custom_command(
OUTPUT ${VULKAN_TEST_RESOURCES}/buffer.spv
COMMAND ${Vulkan_glslang_binary}
--target-env vulkan1.0
--target-env vulkan1.1
-o ${VULKAN_TEST_RESOURCES}/buffer.spv
${CMAKE_CURRENT_SOURCE_DIR}/buffer.comp
DEPENDS buffer.comp
@@ -35,7 +35,7 @@ else()
add_custom_command(
OUTPUT ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
COMMAND ${Vulkan_glslang_binary}
--target-env vulkan1.0
--target-env vulkan1.1
-o ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
${CMAKE_CURRENT_BINARY_DIR}/image2D_${GLSL_FORMAT}.comp
DEPENDS image2D_${GLSL_FORMAT}.comp