mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Merge branch 'main' into cl_khr_unified_svm
This commit is contained in:
@@ -25,11 +25,18 @@ if(USE_CL_EXPERIMENTAL)
|
||||
add_definitions(-DCL_EXPERIMENTAL)
|
||||
endif(USE_CL_EXPERIMENTAL)
|
||||
|
||||
option(SANITIZER_THREAD "Build with the thread sanitiser" OFF)
|
||||
if (SANITIZER_THREAD)
|
||||
add_compile_options(-fsanitize=thread)
|
||||
add_link_options(-fsanitize=thread)
|
||||
endif (SANITIZER_THREAD)
|
||||
|
||||
#-----------------------------------------------------------
|
||||
# Default Configurable Test Set
|
||||
#-----------------------------------------------------------
|
||||
option(D3D10_IS_SUPPORTED "Run DirectX 10 interop tests" OFF)
|
||||
option(D3D11_IS_SUPPORTED "Run DirectX 11 interop tests" OFF)
|
||||
option(D3D12_IS_SUPPORTED "Run DirectX 12 interop tests" OFF)
|
||||
option(GL_IS_SUPPORTED "Run OpenGL interop tests" OFF)
|
||||
option(GLES_IS_SUPPORTED "Run OpenGL ES interop tests" OFF)
|
||||
option(VULKAN_IS_SUPPORTED "Run Vulkan interop tests" OFF)
|
||||
|
||||
@@ -137,10 +137,10 @@ uint32_t get_channel_order_channel_count(cl_channel_order order)
|
||||
case CL_RGx: return 2;
|
||||
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
case CL_sRGB:
|
||||
case CL_sRGBx: return 3;
|
||||
case CL_sRGB: return 3;
|
||||
|
||||
case CL_RGBx:
|
||||
case CL_sRGBx:
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
|
||||
@@ -39,4 +39,20 @@ inline std::string str_sprintf(const std::string &str, Args... args)
|
||||
return std::string(buffer.get(), buffer.get() + s - 1);
|
||||
}
|
||||
|
||||
// Returns the argument, converted to std::string.
|
||||
// The return type of std::filesystem::path::u8string() was
|
||||
// std::string in C++17, but became std::u8string in C++20.
|
||||
// Use this method to wrap the result when a std::string
|
||||
// is desired.
|
||||
//
|
||||
// Use a template with a specialization for std::string,
|
||||
// so the generic template applies when std::u8string exists
|
||||
// and is used.
|
||||
template <typename STRING_TYPE>
|
||||
inline std::string to_string(const STRING_TYPE &str)
|
||||
{
|
||||
return std::string(str.begin(), str.end());
|
||||
}
|
||||
inline std::string to_string(const std::string &str) { return str; }
|
||||
|
||||
#endif // STRING_HELPERS_H
|
||||
|
||||
@@ -56,6 +56,9 @@ if(VULKAN_IS_SUPPORTED)
|
||||
add_subdirectory( common/vulkan_wrapper )
|
||||
add_subdirectory( vulkan )
|
||||
endif()
|
||||
if(D3D12_IS_SUPPORTED)
|
||||
add_subdirectory(common/directx_wrapper)
|
||||
endif ()
|
||||
|
||||
file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
|
||||
|
||||
|
||||
@@ -15,142 +15,167 @@
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
typedef struct {
|
||||
cl_int *pA;
|
||||
cl_int *pB;
|
||||
cl_int *pC;
|
||||
typedef struct
|
||||
{
|
||||
cl_int* pA;
|
||||
cl_int* pB;
|
||||
cl_int* pC;
|
||||
} BufPtrs;
|
||||
|
||||
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
"struct BufPtrs;\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" __global int *pA;\n"
|
||||
" __global int *pB;\n"
|
||||
" __global int *pC;\n"
|
||||
"} BufPtrs;\n"
|
||||
"\n"
|
||||
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
|
||||
"{\n"
|
||||
" size_t i;\n"
|
||||
" i = get_global_id(0);\n"
|
||||
" pBufs->pA[i]++;\n"
|
||||
" pBufs->pB[i]++;\n"
|
||||
" pBufs->pC[i]++;\n"
|
||||
"}\n"
|
||||
const char* set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
"struct BufPtrs;\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" __global int *pA;\n"
|
||||
" __global int *pB;\n"
|
||||
" __global int *pC;\n"
|
||||
"} BufPtrs;\n"
|
||||
"\n"
|
||||
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
|
||||
"{\n"
|
||||
" size_t i;\n"
|
||||
" i = get_global_id(0);\n"
|
||||
" pBufs->pA[i]++;\n"
|
||||
" pBufs->pB[i]++;\n"
|
||||
" pBufs->pC[i]++;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
// Test that clSetKernelExecInfo works correctly with
|
||||
// CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
//
|
||||
REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
|
||||
{
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
// error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
|
||||
// &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
|
||||
error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
|
||||
&c, &program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
// error = create_cl_objects(device,
|
||||
// &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q,
|
||||
// &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
|
||||
error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
|
||||
&c, &program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if (error == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (error < 0) return -1; // fail test.
|
||||
|
||||
|
||||
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper k =
|
||||
clCreateKernel(program, "set_kernel_exec_info_test", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t size = num_elements*sizeof(int);
|
||||
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
|
||||
size_t size = num_elements * sizeof(int);
|
||||
// int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE |
|
||||
// CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); int* pB =
|
||||
// (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
|
||||
// sizeof(int)*num_elements, 0); int* pC = (int*) clSVMalloc(c,
|
||||
// CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
|
||||
// sizeof(int)*num_elements, 0);
|
||||
int* pA = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pB = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pC = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
BufPtrs* pBuf =
|
||||
(BufPtrs*)clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
|
||||
|
||||
bool failed = false;
|
||||
{
|
||||
clMemWrapper ba,bb,bc,bBuf;
|
||||
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
|
||||
|
||||
pBuf->pA = pA;
|
||||
pBuf->pB = pB;
|
||||
pBuf->pC = pC;
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
|
||||
error = clSetKernelArgSVMPointer(k, 0, pBuf);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
|
||||
test_error(error, "clSetKernelExecInfo failed");
|
||||
|
||||
size_t range = num_elements;
|
||||
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++)
|
||||
bool failed = false;
|
||||
{
|
||||
if(pA[i] + pB[i] + pC[i] != 3)
|
||||
failed = true;
|
||||
clMemWrapper ba, bb, bc, bBuf;
|
||||
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf,
|
||||
&error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, sizeof(BufPtrs), 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for (int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
|
||||
|
||||
pBuf->pA = pA;
|
||||
pBuf->pB = pB;
|
||||
pBuf->pC = pC;
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
|
||||
error = clSetKernelArgSVMPointer(k, 0, pBuf);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS,
|
||||
sizeof(BufPtrs), pBuf);
|
||||
test_error(error, "clSetKernelExecInfo failed");
|
||||
|
||||
size_t range = num_elements;
|
||||
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
// Special case testing of unsetting previously set SVM pointers
|
||||
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, 0, NULL);
|
||||
test_error(error,
|
||||
"Unsetting previously set SVM pointers using "
|
||||
"clSetKernelExecInfo failed");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for (int i = 0; i < num_elements; i++)
|
||||
{
|
||||
if (pA[i] + pB[i] + pC[i] != 3) failed = true;
|
||||
}
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, " clFinish failed.");
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, " clFinish failed.");
|
||||
clSVMFree(c, pA);
|
||||
clSVMFree(c, pB);
|
||||
clSVMFree(c, pC);
|
||||
clSVMFree(c, pBuf);
|
||||
|
||||
clSVMFree(c, pA);
|
||||
clSVMFree(c, pB);
|
||||
clSVMFree(c, pC);
|
||||
clSVMFree(c, pBuf);
|
||||
if (failed) return -1;
|
||||
|
||||
if(failed) return -1;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -16,92 +16,184 @@
|
||||
#include "testBase.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
REGISTER_TEST(negative_create_command_queue)
|
||||
{
|
||||
cl_command_queue_properties device_props = 0;
|
||||
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
|
||||
sizeof(device_props), &device_props, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
|
||||
cl_int err = 0;
|
||||
clCreateCommandQueue(nullptr, device, 0, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_CONTEXT,
|
||||
"clCreateCommandQueue should return CL_INVALID_CONTEXT when: \"context "
|
||||
"is not a valid context\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional property to
|
||||
// clCreateCommandQueue, CL_QUEUE_PROFILING_ENABLE is mandatory.
|
||||
const bool out_of_order_device_support =
|
||||
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
if (out_of_order_device_support)
|
||||
clCreateCommandQueue(context, nullptr, 0, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_DEVICE,
|
||||
"clCreateCommandQueue should return CL_INVALID_DEVICE when: \"device "
|
||||
"is not a valid device\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
cl_device_id different_device = GetOpposingDevice(device);
|
||||
if (different_device && device != different_device)
|
||||
{
|
||||
// Early return as we can't check correct error is returned for
|
||||
// unsupported property.
|
||||
return TEST_PASS;
|
||||
clCreateCommandQueue(context, different_device, 0, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_DEVICE,
|
||||
"clCreateCommandQueue should return CL_INVALID_DEVICE when: "
|
||||
"\"device is not associated with context\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
|
||||
// Try create a command queue with out-of-order property and check return
|
||||
// code
|
||||
cl_int test_error = CL_SUCCESS;
|
||||
clCommandQueueWrapper test_queue = clCreateCommandQueue(
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &test_error);
|
||||
|
||||
cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
|
||||
-1) };
|
||||
clCreateCommandQueue(context, device, invalid_property, &err);
|
||||
test_failure_error_ret(
|
||||
test_error, CL_INVALID_QUEUE_PROPERTIES,
|
||||
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES if "
|
||||
"values specified in properties are valid but are not supported by "
|
||||
"the "
|
||||
"device.",
|
||||
err, CL_INVALID_VALUE,
|
||||
"clCreateCommandQueue should return CL_INVALID_VALUE when: \"values "
|
||||
"specified in properties are not valid\"",
|
||||
TEST_FAIL);
|
||||
|
||||
cl_command_queue_properties device_queue_properties = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
|
||||
sizeof(device_queue_properties),
|
||||
&device_queue_properties, nullptr);
|
||||
test_error(err, "clGetDeviceInfo");
|
||||
cl_command_queue_properties valid_properties[] = {
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
|
||||
};
|
||||
cl_queue_properties property{ 0 };
|
||||
bool missing_property = false;
|
||||
// Iterate through all possible properties to find one which isn't supported
|
||||
for (auto prop : valid_properties)
|
||||
{
|
||||
if ((device_queue_properties & prop) == 0)
|
||||
{
|
||||
missing_property = true;
|
||||
property = prop;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// This test can only run when a device does not support a property
|
||||
if (missing_property)
|
||||
{
|
||||
clCreateCommandQueue(context, device, property, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_QUEUE_PROPERTIES,
|
||||
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES "
|
||||
"when: \"values specified in properties are valid but are not "
|
||||
"supported by the device\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST_VERSION(negative_create_command_queue_with_properties,
|
||||
Version(2, 0))
|
||||
{
|
||||
cl_command_queue_properties device_props = 0;
|
||||
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
|
||||
sizeof(device_props), &device_props, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
|
||||
cl_int err = 0;
|
||||
clCreateCommandQueueWithProperties(nullptr, device, nullptr, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_CONTEXT,
|
||||
"clCreateCommandQueueWithProperties should return CL_INVALID_CONTEXT "
|
||||
"when: \"context is not a valid context\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
cl_command_queue_properties device_on_host_props = 0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
|
||||
sizeof(device_on_host_props), &device_on_host_props,
|
||||
NULL);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo for CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
|
||||
clCreateCommandQueueWithProperties(context, nullptr, nullptr, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_DEVICE,
|
||||
"clCreateCommandQueueWithProperties should return CL_INVALID_DEVICE "
|
||||
"when: \"device is not a valid device\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
if (device_on_host_props != device_props)
|
||||
cl_device_id different_device = GetOpposingDevice(device);
|
||||
if (different_device && device != different_device)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: CL_DEVICE_QUEUE_PROPERTIES and "
|
||||
"CL_DEVICE_QUEUE_ON_HOST_PROPERTIES properties should match\n");
|
||||
return TEST_FAIL;
|
||||
clCreateCommandQueueWithProperties(context, different_device, nullptr,
|
||||
&err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_DEVICE,
|
||||
"clCreateCommandQueueWithProperties should return "
|
||||
"CL_INVALID_DEVICE when: \"device is not associated with context\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
|
||||
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
|
||||
// property to clCreateCommandQueueWithProperties,
|
||||
// CL_QUEUE_PROFILING_ENABLE is mandatory.
|
||||
const bool out_of_order_device_support =
|
||||
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
if (out_of_order_device_support)
|
||||
{
|
||||
// Early return as we can't check correct error is returned for
|
||||
// unsupported property.
|
||||
return TEST_PASS;
|
||||
}
|
||||
cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
|
||||
-1) };
|
||||
|
||||
// Try create a command queue with out-of-order property and check return
|
||||
// code
|
||||
cl_command_queue_properties queue_prop_def[] = {
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
|
||||
// Depending on the OpenCL Version, there can be up to 2 properties which
|
||||
// each take values, and the list should be terminated with a 0
|
||||
cl_queue_properties properties[] = { invalid_property, invalid_property, 0,
|
||||
0, 0 };
|
||||
clCreateCommandQueueWithProperties(context, device, properties, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_VALUE,
|
||||
"clCreateCommandQueueWithProperties should return CL_INVALID_VALUE "
|
||||
"when: \"values specified in properties are not valid\"",
|
||||
TEST_FAIL);
|
||||
|
||||
cl_command_queue_properties device_queue_properties = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
|
||||
sizeof(device_queue_properties),
|
||||
&device_queue_properties, nullptr);
|
||||
test_error(err, "clGetDeviceInfo");
|
||||
cl_command_queue_properties valid_properties[] = {
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
|
||||
};
|
||||
|
||||
cl_int test_error = CL_SUCCESS;
|
||||
clCommandQueueWrapper test_queue = clCreateCommandQueueWithProperties(
|
||||
context, device, queue_prop_def, &test_error);
|
||||
|
||||
test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
|
||||
"clCreateCommandQueueWithProperties should "
|
||||
"return CL_INVALID_QUEUE_PROPERTIES if "
|
||||
"values specified in properties are valid but "
|
||||
"are not supported by the "
|
||||
"device.",
|
||||
TEST_FAIL);
|
||||
properties[0] = CL_QUEUE_PROPERTIES;
|
||||
bool missing_property = false;
|
||||
// Iterate through all possible properties to find one which isn't supported
|
||||
for (auto property : valid_properties)
|
||||
{
|
||||
if ((device_queue_properties & property) == 0)
|
||||
{
|
||||
missing_property = true;
|
||||
properties[1] = property;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (missing_property)
|
||||
{
|
||||
clCreateCommandQueueWithProperties(context, device, properties, &err);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_QUEUE_PROPERTIES,
|
||||
"clCreateCommandQueueWithProperties should return "
|
||||
"CL_INVALID_QUEUE_PROPERTIES when: \"values specified in "
|
||||
"properties are valid but are not supported by the device\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
else if (get_device_cl_version(device) >= Version(2, 0))
|
||||
{
|
||||
cl_uint max_size = -1;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
|
||||
sizeof(max_size), &max_size, nullptr);
|
||||
test_error(err, "clGetDeviceInfo");
|
||||
if (max_size > 0 && max_size < CL_UINT_MAX)
|
||||
{
|
||||
properties[0] = CL_QUEUE_PROPERTIES;
|
||||
properties[1] =
|
||||
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
properties[2] = CL_QUEUE_SIZE;
|
||||
properties[3] = max_size + 1;
|
||||
clCreateCommandQueueWithProperties(context, device, properties,
|
||||
&err);
|
||||
if (err != CL_INVALID_VALUE && err != CL_INVALID_QUEUE_PROPERTIES)
|
||||
{
|
||||
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clCreateCommandQueueWithProperties should return "
|
||||
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES "
|
||||
"when: \"values specified in properties are not "
|
||||
"valid\" using a queue size greather than "
|
||||
"CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES",
|
||||
__FILE__, __LINE__);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -166,3 +258,236 @@ REGISTER_TEST(negative_create_command_queue_with_properties_khr)
|
||||
TEST_FAIL);
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
|
||||
{
|
||||
cl_int err = 0;
|
||||
if (get_device_cl_version(device) >= Version(3, 0))
|
||||
{
|
||||
cl_device_device_enqueue_capabilities device_capabilities = 0;
|
||||
cl_int err = clGetDeviceInfo(
|
||||
device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
|
||||
sizeof(device_capabilities), &device_capabilities, nullptr);
|
||||
test_error(err, "clGetDeviceInfo");
|
||||
if (((device_capabilities & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) == 0)
|
||||
&& ((device_capabilities & CL_DEVICE_QUEUE_SUPPORTED) == 1))
|
||||
{
|
||||
const cl_queue_properties properties[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_ON_DEVICE
|
||||
| CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
|
||||
0
|
||||
};
|
||||
clCommandQueueWrapper cmd_queue =
|
||||
clCreateCommandQueueWithProperties(context, device, properties,
|
||||
&err);
|
||||
test_error(err, "clCreateCommandQueueWithProperties");
|
||||
err = clSetDefaultDeviceCommandQueue(context, device, cmd_queue);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_OPERATION,
|
||||
"clSetDefaultDeviceCommandQueue should return "
|
||||
"CL_INVALID_OPERATION when \"device does not support a "
|
||||
"replaceable default on-device queue\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
}
|
||||
|
||||
err = clSetDefaultDeviceCommandQueue(nullptr, device, queue);
|
||||
if (err != CL_INVALID_OPERATION && err != CL_INVALID_CONTEXT)
|
||||
{
|
||||
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clSetDefaultDeviceCommandQueue should return "
|
||||
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT when: \"context "
|
||||
"is not a valid context\" using a nullptr",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT", __FILE__,
|
||||
__LINE__);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
err = clSetDefaultDeviceCommandQueue(context, nullptr, queue);
|
||||
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
|
||||
{
|
||||
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clSetDefaultDeviceCommandQueue should return "
|
||||
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: \"device "
|
||||
"is not a valid device\" using a nullptr",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
|
||||
__LINE__);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
cl_device_id different_device = GetOpposingDevice(device);
|
||||
if (different_device && device != different_device)
|
||||
{
|
||||
err = clSetDefaultDeviceCommandQueue(context, different_device, queue);
|
||||
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
|
||||
{
|
||||
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clSetDefaultDeviceCommandQueue should return "
|
||||
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: "
|
||||
"\"device is not associated with context\"",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
|
||||
__LINE__);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
err = clSetDefaultDeviceCommandQueue(context, device, nullptr);
|
||||
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clSetDefaultDeviceCommandQueue should return CL_INVALID_OPERATION "
|
||||
"or CL_INVALID_COMMAND_QUEUE when: \"command_queue is not a valid "
|
||||
"command-queue for device\" using a nullptr",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE", __FILE__,
|
||||
__LINE__);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
{
|
||||
constexpr cl_queue_properties props[] = {
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
|
||||
};
|
||||
clCommandQueueWrapper not_on_device_queue =
|
||||
clCreateCommandQueueWithProperties(context, device, props, &err);
|
||||
test_error_fail(err, "clCreateCommandQueueWithProperties failed");
|
||||
err = clSetDefaultDeviceCommandQueue(context, device,
|
||||
not_on_device_queue);
|
||||
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
|
||||
{
|
||||
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
|
||||
"clSetDefaultDeviceCommandQueue should return "
|
||||
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
|
||||
"\"command_queue is not a valid command-queue for "
|
||||
"device\" using a command queue that is not on device",
|
||||
IGetErrorString(err),
|
||||
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_retain_command_queue)
|
||||
{
|
||||
cl_int err = clRetainCommandQueue(nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_COMMAND_QUEUE,
|
||||
"clRetainCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
|
||||
"\"command_queue is not a valid command-queue\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_release_command_queue)
|
||||
{
|
||||
cl_int err = clReleaseCommandQueue(nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_COMMAND_QUEUE,
|
||||
"clReleaseCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
|
||||
"\"command_queue is not a valid command-queue\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
static bool device_supports_on_device_queue(cl_device_id deviceID)
|
||||
{
|
||||
cl_command_queue_properties device_queue_properties = 0;
|
||||
if (get_device_cl_version(deviceID) >= Version(2, 0))
|
||||
{
|
||||
cl_int err = clGetDeviceInfo(
|
||||
deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
|
||||
sizeof(device_queue_properties), &device_queue_properties, nullptr);
|
||||
test_error(err, "clGetDeviceInfo");
|
||||
return (device_queue_properties
|
||||
& CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_get_command_queue_info)
|
||||
{
|
||||
cl_int err =
|
||||
clGetCommandQueueInfo(nullptr, CL_QUEUE_CONTEXT, 0, nullptr, nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_COMMAND_QUEUE,
|
||||
"clGetCommandQueueInfo should return CL_INVALID_COMMAND_QUEUE when: "
|
||||
"\"command_queue is not a valid command-queue\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
if (device_supports_on_device_queue(device))
|
||||
{
|
||||
const cl_queue_properties properties[] = {
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
|
||||
};
|
||||
cl_int err = CL_INVALID_VALUE;
|
||||
clCommandQueueWrapper cmd_queue = clCreateCommandQueueWithProperties(
|
||||
context, device, properties, &err);
|
||||
test_error(err, "clCreateCommandQueueWithProperties");
|
||||
cl_uint queue_size = -1;
|
||||
err = clGetCommandQueueInfo(cmd_queue, CL_QUEUE_SIZE,
|
||||
sizeof(queue_size), &queue_size, nullptr);
|
||||
test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
|
||||
"clGetCommandQueueInfo should return "
|
||||
"CL_INVALID_COMMAND_QUEUE when: \"command_queue "
|
||||
"is not a valid command-queue for param_name\"",
|
||||
TEST_FAIL);
|
||||
}
|
||||
|
||||
constexpr cl_command_queue_info invalid_param = -1;
|
||||
err = clGetCommandQueueInfo(queue, invalid_param, 0, nullptr, nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_VALUE,
|
||||
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: "
|
||||
"\"param_name is not one of the supported values\"",
|
||||
TEST_FAIL);
|
||||
|
||||
|
||||
cl_uint ref_count = -1;
|
||||
err = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, 0, &ref_count,
|
||||
nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_VALUE,
|
||||
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: \"size in "
|
||||
"bytes specified by param_value_size is < size of return type and "
|
||||
"param_value is not a NULL value\"",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST_VERSION(negative_set_command_queue_property, Version(1, 0))
|
||||
{
|
||||
auto version = get_device_cl_version(device);
|
||||
if (version >= Version(1, 1))
|
||||
{
|
||||
// Implementations are allowed to return an error for
|
||||
// non-OpenCL 1.0 devices. In which case, skip the test.
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
cl_queue_properties property{ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE };
|
||||
cl_int err = clSetCommandQueueProperty(nullptr, property, CL_TRUE, nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_COMMAND_QUEUE,
|
||||
"clSetCommandQueueProperty should return CL_INVALID_COMMAND_QUEUE "
|
||||
"when: \"command_queue is not a valid command-queue\" using a nullptr",
|
||||
TEST_FAIL);
|
||||
|
||||
property = -1;
|
||||
err = clSetCommandQueueProperty(queue, property, CL_TRUE, nullptr);
|
||||
test_failure_error_ret(
|
||||
err, CL_INVALID_VALUE,
|
||||
"clSetCommandQueueProperty should return CL_INVALID_VALUE when: "
|
||||
"\"values specified in properties are not valid\"",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
@@ -336,3 +336,161 @@ REGISTER_TEST(kernel_attributes)
|
||||
}
|
||||
return success ? TEST_PASS : TEST_FAIL;
|
||||
}
|
||||
|
||||
REGISTER_TEST(null_required_work_group_size)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
clGetKernelSuggestedLocalWorkSizeKHR_fn
|
||||
clGetKernelSuggestedLocalWorkSizeKHR = nullptr;
|
||||
if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
|
||||
{
|
||||
cl_platform_id platform = nullptr;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||
&platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo for platform failed");
|
||||
|
||||
clGetKernelSuggestedLocalWorkSizeKHR =
|
||||
(clGetKernelSuggestedLocalWorkSizeKHR_fn)
|
||||
clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clGetKernelSuggestedLocalWorkSizeKHR");
|
||||
test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
|
||||
"Couldn't get function pointer for "
|
||||
"clGetKernelSuggestedLocalWorkSizeKHR");
|
||||
}
|
||||
|
||||
cl_uint device_max_dim = 0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
||||
sizeof(device_max_dim), &device_max_dim, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
|
||||
test_assert_error(device_max_dim >= 3,
|
||||
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
|
||||
|
||||
std::vector<size_t> device_max_work_item_sizes(device_max_dim);
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
||||
sizeof(size_t) * device_max_dim,
|
||||
device_max_work_item_sizes.data(), nullptr);
|
||||
|
||||
size_t device_max_work_group_size = 0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
||||
sizeof(device_max_work_group_size),
|
||||
&device_max_work_group_size, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
|
||||
|
||||
clMemWrapper dst;
|
||||
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
|
||||
nullptr, &error);
|
||||
|
||||
struct KernelAttribInfo
|
||||
{
|
||||
std::string str;
|
||||
cl_uint max_dim;
|
||||
};
|
||||
|
||||
std::vector<KernelAttribInfo> attribs;
|
||||
attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
|
||||
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
|
||||
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
|
||||
|
||||
const std::string body_str = R"(
|
||||
__kernel void wg_size(__global int* dst)
|
||||
{
|
||||
if (get_global_id(0) == 0 &&
|
||||
get_global_id(1) == 0 &&
|
||||
get_global_id(2) == 0) {
|
||||
dst[0] = get_local_size(0);
|
||||
dst[1] = get_local_size(1);
|
||||
dst[2] = get_local_size(2);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
for (auto& attrib : attribs)
|
||||
{
|
||||
const std::string source_str = attrib.str + body_str;
|
||||
const char* source = source_str.c_str();
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
&source, "wg_size");
|
||||
test_error(error, "Unable to create test kernel");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
|
||||
{
|
||||
const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
|
||||
work_dim >= 3 ? 4 : 1 };
|
||||
const size_t test_work_group_size =
|
||||
expected[0] * expected[1] * expected[2];
|
||||
if ((size_t)expected[0] > device_max_work_item_sizes[0]
|
||||
|| (size_t)expected[1] > device_max_work_item_sizes[1]
|
||||
|| (size_t)expected[2] > device_max_work_item_sizes[2]
|
||||
|| test_work_group_size > device_max_work_group_size)
|
||||
{
|
||||
log_info("Skipping test for work_dim = %u: required work group "
|
||||
"size (%i, %i, %i) (total %zu) exceeds device max "
|
||||
"work group size (%zu, %zu, %zu) (total %zu)\n",
|
||||
work_dim, expected[0], expected[1], expected[2],
|
||||
test_work_group_size, device_max_work_item_sizes[0],
|
||||
device_max_work_item_sizes[1],
|
||||
device_max_work_item_sizes[2],
|
||||
device_max_work_group_size);
|
||||
continue;
|
||||
}
|
||||
|
||||
const cl_int zero = 0;
|
||||
error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
|
||||
sizeof(expected), 0, nullptr, nullptr);
|
||||
|
||||
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
|
||||
global_work_size, nullptr, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
cl_int results[3] = { -1, -1, -1 };
|
||||
error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
|
||||
results, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
if (results[0] != expected[0] || results[1] != expected[1]
|
||||
|| results[2] != expected[2])
|
||||
{
|
||||
log_error("Executed local size mismatch with work_dim = %u: "
|
||||
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
|
||||
work_dim, expected[0], expected[1], expected[2],
|
||||
results[0], results[1], results[2]);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
|
||||
{
|
||||
size_t suggested[3] = { 1, 1, 1 };
|
||||
error = clGetKernelSuggestedLocalWorkSizeKHR(
|
||||
queue, kernel, work_dim, nullptr, global_work_size,
|
||||
suggested);
|
||||
test_error(error,
|
||||
"clGetKernelSuggestedLocalWorkSizeKHR failed");
|
||||
|
||||
if ((cl_int)suggested[0] != expected[0]
|
||||
|| (cl_int)suggested[1] != expected[1]
|
||||
|| (cl_int)suggested[2] != expected[2])
|
||||
{
|
||||
log_error("Suggested local size mismatch with work_dim = "
|
||||
"%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
|
||||
work_dim, expected[0], expected[1], expected[2],
|
||||
(cl_int)suggested[0], (cl_int)suggested[1],
|
||||
(cl_int)suggested[2]);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include "testBase.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/stringHelpers.h"
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
const char *sample_single_test_kernel[] = {
|
||||
@@ -87,6 +89,16 @@ const char *sample_two_kernel_program[] = {
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_sampler_size_test_kernel = R"(
|
||||
__kernel void sampler_size_test(sampler_t sampler, __read_only image2d_t src, __global float4 *dst)
|
||||
{
|
||||
int tid = get_global_id(0);
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1));
|
||||
float4 data = read_imagef(src, sampler, coord);
|
||||
dst[tid] = data;
|
||||
}
|
||||
)";
|
||||
|
||||
const char *sample_mem_obj_size_test_kernel = R"(
|
||||
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
|
||||
{
|
||||
@@ -117,6 +129,14 @@ const char *sample_write_only_image_test_kernel = R"(
|
||||
}
|
||||
)";
|
||||
|
||||
const char *sample_arg_size_test_kernel = R"(
|
||||
%s
|
||||
__kernel void arg_size_test(%s src, __global %s *dst)
|
||||
{
|
||||
dst[0]=src;
|
||||
}
|
||||
)";
|
||||
|
||||
REGISTER_TEST(get_kernel_info)
|
||||
{
|
||||
int error;
|
||||
@@ -734,6 +754,148 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_sampler)
|
||||
{
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper sampler_arg_kernel;
|
||||
|
||||
// Setup the test
|
||||
error =
|
||||
create_single_kernel_helper(context, &program, nullptr, 1,
|
||||
&sample_sampler_size_test_kernel, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
|
||||
test_error(error,
|
||||
"Unable to get sampler_size_test kernel for built program");
|
||||
|
||||
// Run the test - CL_INVALID_SAMPLER
|
||||
error = clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler), nullptr);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_SAMPLER,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_SAMPLER when "
|
||||
"argument is declared to be of type sampler_t and the specified "
|
||||
"arg_value is not a valid sampler object",
|
||||
TEST_FAIL);
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_sampler_size)
|
||||
{
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper sampler_arg_kernel;
|
||||
|
||||
// Setup the test
|
||||
error =
|
||||
create_single_kernel_helper(context, &program, nullptr, 1,
|
||||
&sample_sampler_size_test_kernel, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
|
||||
test_error(error,
|
||||
"Unable to get sampler_size_test kernel for built program");
|
||||
|
||||
clSamplerWrapper sampler = clCreateSampler(
|
||||
context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
|
||||
test_error(error, "Unable to create sampler");
|
||||
|
||||
// Run the test - CL_INVALID_ARG_SIZE
|
||||
error =
|
||||
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) * 2, &sampler);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_SIZE,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||
"argument is a sampler object and arg_size > sizeof(cl_sampler)",
|
||||
TEST_FAIL);
|
||||
|
||||
error =
|
||||
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) / 2, &sampler);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_SIZE,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||
"argument is a sampler object and arg_size < sizeof(cl_sampler)",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_size)
|
||||
{
|
||||
std::vector<ExplicitType> exp_types = { kChar, kUChar, kShort, kUShort,
|
||||
kInt, kUInt, kLong, kULong,
|
||||
kFloat, kHalf, kDouble };
|
||||
|
||||
bool fp16_supported = is_extension_available(device, "cl_khr_fp16");
|
||||
bool fp64_supported = is_extension_available(device, "cl_khr_fp64");
|
||||
|
||||
for (unsigned int type_num = 0; type_num < exp_types.size(); type_num++)
|
||||
{
|
||||
auto type = exp_types[type_num];
|
||||
|
||||
if ((type == kLong || type == kULong) && !gHasLong)
|
||||
continue;
|
||||
else if (type == kDouble && !fp64_supported)
|
||||
continue;
|
||||
else if (type == kHalf && !fp16_supported)
|
||||
continue;
|
||||
else if (strchr(get_explicit_type_name(type), ' ') != 0)
|
||||
continue;
|
||||
|
||||
std::array<unsigned int, 5> sizes = { 1, 2, 4, 8, 16 };
|
||||
std::vector<char> buf(sizeof(cl_ulong16), 0);
|
||||
for (unsigned i = 0; i < sizes.size(); i++)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
size_t destStride = get_explicit_type_size(type) * sizes[i];
|
||||
|
||||
std::ostringstream vecNameStr;
|
||||
vecNameStr << get_explicit_type_name(type);
|
||||
if (sizes[i] != 1) vecNameStr << sizes[i];
|
||||
|
||||
std::string ext_str;
|
||||
if (type == kDouble)
|
||||
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
|
||||
if (type == kHalf)
|
||||
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
|
||||
auto vt_name = vecNameStr.str();
|
||||
std::string program_source =
|
||||
str_sprintf(std::string(sample_arg_size_test_kernel),
|
||||
ext_str.c_str(), vt_name.c_str(), vt_name.c_str());
|
||||
|
||||
const char *ptr = program_source.c_str();
|
||||
cl_int error = create_single_kernel_helper(
|
||||
context, &program, &kernel, 1, &ptr, "arg_size_test");
|
||||
test_error(error, "Unable to build test program!");
|
||||
|
||||
// Run the test
|
||||
size_t reduced = destStride / 2;
|
||||
error = clSetKernelArg(kernel, 0, reduced, buf.data());
|
||||
if (error != CL_INVALID_ARG_SIZE)
|
||||
{
|
||||
std::stringstream sstr;
|
||||
sstr << "clSetKernelArg is supposed to fail "
|
||||
"with CL_INVALID_ARG_SIZE with type "
|
||||
<< vecNameStr.str() << " and sizeof " << reduced
|
||||
<< std::endl;
|
||||
log_error("%s", sstr.str().c_str());
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_mem_obj)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
@@ -255,18 +255,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
|
||||
}
|
||||
|
||||
// Initialize our storage
|
||||
std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]);
|
||||
if (!l_bin_counts)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for "
|
||||
"bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
std::vector<cl_int> l_bin_counts(number_of_bins);
|
||||
int i;
|
||||
for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
|
||||
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
|
||||
sizeof(cl_int) * number_of_bins,
|
||||
l_bin_counts.get(), 0, NULL, NULL);
|
||||
l_bin_counts.data(), 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to set initial values for "
|
||||
@@ -275,19 +269,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::unique_ptr<cl_int[]> values(
|
||||
new cl_int[number_of_bins * max_counts_per_bin]);
|
||||
if (!values)
|
||||
{
|
||||
log_error(
|
||||
"add_index_bin_test FAILED to allocate initial values for bins.\n");
|
||||
return -1;
|
||||
}
|
||||
std::vector<cl_int> values(number_of_bins * max_counts_per_bin);
|
||||
for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
|
||||
err = clEnqueueWriteBuffer(queue, bins, true, 0,
|
||||
sizeof(cl_int) * number_of_bins
|
||||
* max_counts_per_bin,
|
||||
values.get(), 0, NULL, NULL);
|
||||
values.data(), 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error(
|
||||
@@ -296,13 +283,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]);
|
||||
if (!l_bin_assignments)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for "
|
||||
"l_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
std::vector<cl_int> l_bin_assignments(number_of_items);
|
||||
for (i = 0; i < number_of_items; i++)
|
||||
{
|
||||
int bin = random_in_range(0, number_of_bins - 1, d);
|
||||
@@ -326,7 +307,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
|
||||
sizeof(cl_int) * number_of_items,
|
||||
l_bin_assignments.get(), 0, NULL, NULL);
|
||||
l_bin_assignments.data(), 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to set initial values for "
|
||||
@@ -355,34 +336,22 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::unique_ptr<cl_int[]> final_bin_assignments(
|
||||
new cl_int[number_of_bins * max_counts_per_bin]);
|
||||
if (!final_bin_assignments)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for "
|
||||
"final_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
std::vector<cl_int> final_bin_assignments(number_of_bins
|
||||
* max_counts_per_bin);
|
||||
err = clEnqueueReadBuffer(queue, bins, true, 0,
|
||||
sizeof(cl_int) * number_of_bins
|
||||
* max_counts_per_bin,
|
||||
final_bin_assignments.get(), 0, NULL, NULL);
|
||||
final_bin_assignments.data(), 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]);
|
||||
if (!final_bin_counts)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for "
|
||||
"final_bin_counts.\n");
|
||||
return -1;
|
||||
}
|
||||
std::vector<cl_int> final_bin_counts(number_of_bins);
|
||||
err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
|
||||
sizeof(cl_int) * number_of_bins,
|
||||
final_bin_counts.get(), 0, NULL, NULL);
|
||||
final_bin_counts.data(), 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
|
||||
|
||||
@@ -193,15 +193,38 @@ int AtomicTypeInfo::IsSupported(cl_device_id device)
|
||||
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;}
|
||||
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
|
||||
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
|
||||
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() {return 0;}
|
||||
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() {return CL_FLT_MIN;}
|
||||
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue() {return CL_DBL_MIN;}
|
||||
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
|
||||
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
|
||||
{
|
||||
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
|
||||
}
|
||||
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
|
||||
{
|
||||
return -CL_FLT_MAX;
|
||||
}
|
||||
template <> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue()
|
||||
{
|
||||
return -CL_DBL_MAX;
|
||||
}
|
||||
|
||||
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue() {return CL_INT_MAX;}
|
||||
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue() {return CL_UINT_MAX;}
|
||||
template <> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue()
|
||||
{
|
||||
return CL_INT_MAX;
|
||||
}
|
||||
template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
|
||||
{
|
||||
return CL_UINT_MAX;
|
||||
}
|
||||
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
|
||||
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
|
||||
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
|
||||
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
|
||||
{
|
||||
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
|
||||
}
|
||||
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
|
||||
{
|
||||
return CL_FLT_MAX;
|
||||
}
|
||||
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
|
||||
|
||||
cl_int getSupportedMemoryOrdersAndScopes(
|
||||
|
||||
@@ -79,6 +79,7 @@ extern cl_device_atomic_capabilities gAtomicMemCap,
|
||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||
extern bool gFloatAtomicsSupported;
|
||||
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
||||
extern cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps;
|
||||
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
|
||||
|
||||
extern const char *
|
||||
@@ -893,15 +894,16 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
|
||||
header += std::string("__global volatile ") + aTypeName + " destMemory["
|
||||
+ ss.str() + "] = {\n";
|
||||
ss.str("");
|
||||
|
||||
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||
== TYPE_ATOMIC_FLOAT)
|
||||
ss << std::setprecision(10) << _startValue;
|
||||
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||
== TYPE_ATOMIC_HALF)
|
||||
ss << static_cast<HostDataType>(
|
||||
cl_half_to_float(static_cast<cl_half>(_startValue)));
|
||||
ss << cl_half_to_float(static_cast<cl_half>(_startValue));
|
||||
else
|
||||
ss << _startValue;
|
||||
|
||||
for (cl_uint i = 0; i < maxNumDestItems; i++)
|
||||
{
|
||||
if (aTypeName == "atomic_flag")
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
|
||||
#include "harness/testHarness.h"
|
||||
#include <mutex>
|
||||
|
||||
#include "CL/cl_half.h"
|
||||
|
||||
#ifdef WIN32
|
||||
@@ -99,7 +98,19 @@ template <typename AtomicType, typename CorrespondingType>
|
||||
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
||||
TExplicitMemoryOrderType order)
|
||||
{
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
CorrespondingType old_value = *a;
|
||||
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
|
||||
gHalfRoundingMode);
|
||||
return old_value;
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
AtomicType,
|
||||
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
@@ -109,7 +120,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
return InterlockedExchangeAdd(a, c);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_add(a, c);
|
||||
@@ -124,7 +135,15 @@ template <typename AtomicType, typename CorrespondingType>
|
||||
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
||||
TExplicitMemoryOrderType order)
|
||||
{
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
CorrespondingType old_value = *a;
|
||||
*a -= c;
|
||||
return old_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
@@ -173,14 +192,30 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
||||
TExplicitMemoryOrderType order_failure)
|
||||
{
|
||||
CorrespondingType tmp;
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||
{
|
||||
static std::mutex mtx;
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
tmp = *reinterpret_cast<volatile cl_half *>(a);
|
||||
|
||||
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
|
||||
{
|
||||
*reinterpret_cast<volatile cl_half *>(a) = desired;
|
||||
return true;
|
||||
}
|
||||
*expected = tmp;
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
AtomicType,
|
||||
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
static std::mutex mtx;
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
tmp = *reinterpret_cast<volatile float *>(a);
|
||||
if (tmp == *expected)
|
||||
{
|
||||
*reinterpret_cast<volatile float *>(a) = desired;
|
||||
*a = desired;
|
||||
return true;
|
||||
}
|
||||
*expected = tmp;
|
||||
@@ -188,7 +223,6 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
||||
else
|
||||
{
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
|
||||
tmp = InterlockedCompareExchange(a, desired, *expected);
|
||||
#elif defined(__GNUC__)
|
||||
tmp = __sync_val_compare_and_swap(a, *expected, desired);
|
||||
|
||||
@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
|
||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||
bool gFloatAtomicsSupported = false;
|
||||
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
||||
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
|
||||
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
@@ -134,6 +135,14 @@ test_status InitCL(cl_device_id device) {
|
||||
{
|
||||
gFloatAtomicsSupported = true;
|
||||
|
||||
if (is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
cl_int error = clGetDeviceInfo(
|
||||
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
|
||||
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
||||
}
|
||||
|
||||
cl_int error = clGetDeviceInfo(
|
||||
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
||||
|
||||
@@ -417,7 +417,7 @@ public:
|
||||
correct = true;
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
if constexpr (std::is_same<HostDataType, cl_half>::value)
|
||||
if constexpr (std::is_same_v<HostDataType, cl_half>)
|
||||
{
|
||||
HostDataType test = cl_half_from_float(static_cast<float>(i),
|
||||
gHalfRoundingMode);
|
||||
@@ -1163,13 +1163,30 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
|
||||
num_elements, true);
|
||||
}
|
||||
|
||||
template <typename T> double kahan_sum(const std::vector<T> &nums)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
template <> double kahan_sum<double>(const std::vector<double> &nums)
|
||||
{
|
||||
double sum = 0.0;
|
||||
double compensation = 0.0;
|
||||
for (double num : nums)
|
||||
{
|
||||
double y = num - compensation;
|
||||
double t = sum + y;
|
||||
compensation = (t - sum) - y;
|
||||
sum = t;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
template <typename HostAtomicType, typename HostDataType>
|
||||
class CBasicTestFetchAdd
|
||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||
|
||||
double min_range;
|
||||
double max_range;
|
||||
double max_error_fp32;
|
||||
double max_error;
|
||||
std::vector<HostDataType> ref_vals;
|
||||
|
||||
public:
|
||||
@@ -1182,26 +1199,98 @@ public:
|
||||
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
|
||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||
useSVM),
|
||||
min_range(-999.0), max_range(999.0), max_error_fp32(0.0)
|
||||
min_range(-999.0), max_range(999.0), max_error(0.0)
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
StartValue(0.f);
|
||||
StartValue((HostDataType)0.0);
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::OldValueCheck(false);
|
||||
}
|
||||
}
|
||||
template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
|
||||
{
|
||||
cl_half sum = 0;
|
||||
for (auto it = begin; it != end; ++it)
|
||||
{
|
||||
sum = cl_half_from_float(cl_half_to_float(sum)
|
||||
+ cl_half_to_float(*it),
|
||||
gHalfRoundingMode);
|
||||
}
|
||||
return cl_half_to_float(sum);
|
||||
}
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.resize(threadCount);
|
||||
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
ref_vals[i] = get_random_float(min_range, max_range, d);
|
||||
ref_vals[i] = cl_half_from_float(
|
||||
get_random_float(min_range, max_range, d),
|
||||
gHalfRoundingMode);
|
||||
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
|
||||
// Estimate highest possible summation error for given set.
|
||||
std::vector<float> sums;
|
||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||
[](cl_half a, cl_half b) {
|
||||
return cl_half_to_float(a) < cl_half_to_float(b);
|
||||
});
|
||||
|
||||
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
|
||||
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
||||
|
||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||
[](cl_half a, cl_half b) {
|
||||
return std::abs(cl_half_to_float(a))
|
||||
< std::abs(cl_half_to_float(b));
|
||||
});
|
||||
|
||||
float precise = 0.f;
|
||||
for (auto elem : ref_vals) precise += cl_half_to_float(elem);
|
||||
sums.push_back(precise);
|
||||
|
||||
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
|
||||
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
||||
|
||||
std::sort(sums.begin(), sums.end());
|
||||
max_error = std::abs(sums.front() - sums.back());
|
||||
|
||||
// restore unsorted order
|
||||
memcpy(ref_vals.data(), startRefValues,
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * threadCount);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.resize(threadCount);
|
||||
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
ref_vals[i] =
|
||||
get_random_double(min_range, max_range, d);
|
||||
else
|
||||
ref_vals[i] = get_random_float(min_range, max_range, d);
|
||||
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
@@ -1216,12 +1305,17 @@ public:
|
||||
sums.push_back(
|
||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||
|
||||
std::sort(
|
||||
ref_vals.begin(), ref_vals.end(),
|
||||
[](float a, float b) { return std::abs(a) < std::abs(b); });
|
||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||
[](HostDataType a, HostDataType b) {
|
||||
return std::abs(a) < std::abs(b);
|
||||
});
|
||||
|
||||
double precise = 0.0;
|
||||
for (auto elem : ref_vals) precise += double(elem);
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
precise = kahan_sum(ref_vals);
|
||||
else
|
||||
for (auto elem : ref_vals) precise += double(elem);
|
||||
|
||||
sums.push_back(precise);
|
||||
|
||||
sums.push_back(
|
||||
@@ -1231,8 +1325,7 @@ public:
|
||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||
|
||||
std::sort(sums.begin(), sums.end());
|
||||
max_error_fp32 =
|
||||
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
|
||||
max_error = std::abs(sums.front() - sums.back());
|
||||
|
||||
// restore unsorted order
|
||||
memcpy(ref_vals.data(), startRefValues,
|
||||
@@ -1252,7 +1345,10 @@ public:
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
||||
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||
@@ -1286,7 +1382,10 @@ public:
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
||||
MemoryOrder());
|
||||
@@ -1312,7 +1411,23 @@ public:
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
expected = cl_half_from_float(
|
||||
cl_half_to_float(expected)
|
||||
+ cl_half_to_float(startRefValues[i]),
|
||||
gHalfRoundingMode);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
@@ -1331,12 +1446,22 @@ public:
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return std::abs((HOST_ATOMIC_FLOAT)expected
|
||||
return std::abs(cl_half_to_float(expected)
|
||||
- cl_half_to_float(testValues[whichDestValue]))
|
||||
> max_error;
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return std::abs((HostDataType)expected
|
||||
- testValues[whichDestValue])
|
||||
> max_error_fp32;
|
||||
> max_error;
|
||||
}
|
||||
return CBasicTestMemOrderScope<
|
||||
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||
@@ -1346,7 +1471,10 @@ public:
|
||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||
HostAtomicType *finalValues) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
correct = true;
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
@@ -1369,7 +1497,28 @@ public:
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
|
||||
== 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
@@ -1385,7 +1534,10 @@ public:
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
@@ -1420,6 +1572,16 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||
TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
test_half.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
||||
TYPE_ATOMIC_DOUBLE, useSVM);
|
||||
EXECUTE_TEST(
|
||||
error, test_double.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||
TYPE_ATOMIC_FLOAT, useSVM);
|
||||
EXECUTE_TEST(
|
||||
@@ -1506,7 +1668,10 @@ public:
|
||||
useSVM),
|
||||
min_range(-999.0), max_range(999.0), max_error(0.0)
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
StartValue(0);
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
@@ -1514,6 +1679,13 @@ public:
|
||||
}
|
||||
}
|
||||
template <typename Iterator>
|
||||
HostDataType subtract(Iterator begin, Iterator end)
|
||||
{
|
||||
HostDataType res = 0;
|
||||
for (auto it = begin; it != end; ++it) res = res - *it;
|
||||
return res;
|
||||
}
|
||||
template <typename Iterator>
|
||||
float subtract_halfs(Iterator begin, Iterator end)
|
||||
{
|
||||
cl_half res = 0;
|
||||
@@ -1528,12 +1700,53 @@ public:
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.resize(threadCount);
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
ref_vals[i] = get_random_float(min_range, max_range, d);
|
||||
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
|
||||
// Estimate highest possible subtraction error for given set.
|
||||
std::vector<HostDataType> sums;
|
||||
std::sort(ref_vals.begin(), ref_vals.end());
|
||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
||||
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
||||
|
||||
std::sort(
|
||||
ref_vals.begin(), ref_vals.end(),
|
||||
[](float a, float b) { return std::abs(a) < std::abs(b); });
|
||||
|
||||
double precise = 0.0;
|
||||
for (auto elem : ref_vals) precise += double(elem);
|
||||
sums.push_back(precise);
|
||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
||||
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
||||
|
||||
std::sort(sums.begin(), sums.end());
|
||||
max_error =
|
||||
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
|
||||
|
||||
// restore unsorted order
|
||||
memcpy(ref_vals.data(), startRefValues,
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * threadCount);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.resize(threadCount);
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
ref_vals[i] = cl_half_from_float(
|
||||
get_random_float(min_range, max_range, d),
|
||||
@@ -1563,7 +1776,6 @@ public:
|
||||
float precise = 0.f;
|
||||
for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
|
||||
sums.push_back(precise);
|
||||
|
||||
sums.push_back(
|
||||
subtract_halfs(ref_vals.begin(), ref_vals.end()));
|
||||
sums.push_back(
|
||||
@@ -1571,7 +1783,6 @@ public:
|
||||
|
||||
std::sort(sums.begin(), sums.end());
|
||||
max_error = std::abs(sums.front() - sums.back());
|
||||
|
||||
// restore unsorted order
|
||||
memcpy(ref_vals.data(), startRefValues,
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
@@ -1590,7 +1801,10 @@ public:
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
|
||||
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||
@@ -1612,7 +1826,10 @@ public:
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
|
||||
MemoryOrder());
|
||||
@@ -1634,7 +1851,13 @@ public:
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
expected -= startRefValues[i];
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
{
|
||||
@@ -1659,7 +1882,14 @@ public:
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return std::abs((HOST_ATOMIC_FLOAT)expected
|
||||
- testValues[whichDestValue])
|
||||
> max_error;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return std::abs(cl_half_to_float(expected)
|
||||
@@ -1674,7 +1904,7 @@ public:
|
||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||
HostAtomicType *finalValues) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
correct = true;
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
@@ -1697,7 +1927,17 @@ public:
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
@@ -1713,7 +1953,10 @@ public:
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
@@ -1748,6 +1991,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||
TYPE_ATOMIC_FLOAT, useSVM);
|
||||
EXECUTE_TEST(
|
||||
error, test_float.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||
TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
@@ -2624,7 +2872,10 @@ public:
|
||||
min_range(-999.0), max_range(999.0)
|
||||
{
|
||||
StartValue(DataType().MaxValue());
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::OldValueCheck(false);
|
||||
@@ -2634,7 +2885,10 @@ public:
|
||||
{
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
return " atomic_fetch_min" + postfix
|
||||
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
||||
@@ -2653,7 +2907,10 @@ public:
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
||||
MemoryOrder());
|
||||
@@ -2669,7 +2926,19 @@ public:
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
startRefValues[i] = cl_half_from_float(
|
||||
get_random_float(min_range, max_range, d),
|
||||
gHalfRoundingMode);
|
||||
}
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
@@ -2696,7 +2965,19 @@ public:
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
if (cl_half_to_float(startRefValues[i])
|
||||
< cl_half_to_float(expected))
|
||||
expected = startRefValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
@@ -2716,7 +2997,9 @@ public:
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if (std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||
@@ -2731,12 +3014,14 @@ public:
|
||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||
HostAtomicType *finalValues) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if (std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
|
||||
{
|
||||
correct = true;
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
{
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
if (refValues[i] != StartValue())
|
||||
{
|
||||
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||
(cl_uint)refValues[i]);
|
||||
@@ -2754,7 +3039,31 @@ public:
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
@@ -2772,7 +3081,10 @@ public:
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
@@ -2807,6 +3119,16 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
||||
TYPE_ATOMIC_DOUBLE, useSVM);
|
||||
EXECUTE_TEST(
|
||||
error, test_double.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||
TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
test_half.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||
TYPE_ATOMIC_FLOAT, useSVM);
|
||||
EXECUTE_TEST(
|
||||
@@ -2890,18 +3212,31 @@ public:
|
||||
useSVM),
|
||||
min_range(-999.0), max_range(999.0)
|
||||
{
|
||||
StartValue(DataType().MinValue());
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::OldValueCheck(false);
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
|
||||
else
|
||||
StartValue(-DataType().MaxValue());
|
||||
}
|
||||
else
|
||||
{
|
||||
StartValue(DataType().MinValue());
|
||||
}
|
||||
}
|
||||
std::string ProgramCore() override
|
||||
{
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
return " atomic_fetch_max" + postfix
|
||||
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
||||
@@ -2920,7 +3255,10 @@ public:
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
||||
MemoryOrder());
|
||||
@@ -2936,7 +3274,19 @@ public:
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
startRefValues[i] = cl_half_from_float(
|
||||
get_random_float(min_range, max_range, d),
|
||||
gHalfRoundingMode);
|
||||
}
|
||||
}
|
||||
else if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
@@ -2963,7 +3313,19 @@ public:
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
{
|
||||
if (cl_half_to_float(startRefValues[i])
|
||||
> cl_half_to_float(expected))
|
||||
expected = startRefValues[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
@@ -2983,7 +3345,9 @@ public:
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if (std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||
@@ -2998,7 +3362,9 @@ public:
|
||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||
HostAtomicType *finalValues) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
if (std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
correct = true;
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
@@ -3021,7 +3387,31 @@ public:
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
== 0)
|
||||
return 0;
|
||||
}
|
||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
@@ -3039,7 +3429,10 @@ public:
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
HostDataType,
|
||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
@@ -3074,6 +3467,16 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
CBasicTestFetchMax<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
||||
TYPE_ATOMIC_DOUBLE, useSVM);
|
||||
EXECUTE_TEST(
|
||||
error, test_double.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||
TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
test_half.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||
TYPE_ATOMIC_FLOAT, useSVM);
|
||||
EXECUTE_TEST(
|
||||
@@ -3898,6 +4301,9 @@ private:
|
||||
struct TestDefinition _subCase;
|
||||
};
|
||||
|
||||
#if 0
|
||||
// The tests below are likely incorrect and have been disabled.
|
||||
// See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544
|
||||
static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useSVM)
|
||||
@@ -3977,3 +4383,4 @@ REGISTER_TEST(svm_atomic_fence)
|
||||
return test_atomic_fence_generic(device, context, queue, num_elements,
|
||||
true);
|
||||
}
|
||||
#endif
|
||||
|
||||
13
test_conformance/common/directx_wrapper/CMakeLists.txt
Normal file
13
test_conformance/common/directx_wrapper/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
set(DIRECTX_WRAPPER_SOURCES
|
||||
directx_wrapper.cpp
|
||||
)
|
||||
|
||||
add_library(directx_wrapper STATIC ${DIRECTX_WRAPPER_SOURCES})
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
include_directories(${CLConform_INCLUDE_DIR})
|
||||
|
||||
if (WIN32)
|
||||
target_link_libraries(directx_wrapper d3d12)
|
||||
endif ()
|
||||
71
test_conformance/common/directx_wrapper/directx_wrapper.cpp
Normal file
71
test_conformance/common/directx_wrapper/directx_wrapper.cpp
Normal file
@@ -0,0 +1,71 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "directx_wrapper.hpp"
|
||||
|
||||
DirectXWrapper::DirectXWrapper()
|
||||
{
|
||||
|
||||
HRESULT hr = D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_12_0,
|
||||
IID_PPV_ARGS(&dx_device));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
throw std::runtime_error("Failed to create DirectX 12 device");
|
||||
}
|
||||
|
||||
D3D12_COMMAND_QUEUE_DESC desc{};
|
||||
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||||
desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||||
hr = dx_device->CreateCommandQueue(&desc, IID_PPV_ARGS(&dx_command_queue));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
throw std::runtime_error("Failed to create DirectX 12 command queue");
|
||||
}
|
||||
|
||||
hr = dx_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
IID_PPV_ARGS(&dx_command_allocator));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Failed to create DirectX 12 command allocator");
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12Device* DirectXWrapper::getDXDevice() const { return dx_device.Get(); }
|
||||
|
||||
ID3D12CommandQueue* DirectXWrapper::getDXCommandQueue() const
|
||||
{
|
||||
return dx_command_queue.Get();
|
||||
}
|
||||
ID3D12CommandAllocator* DirectXWrapper::getDXCommandAllocator() const
|
||||
{
|
||||
return dx_command_allocator.Get();
|
||||
}
|
||||
|
||||
DirectXFenceWrapper::DirectXFenceWrapper(ID3D12Device* dx_device)
|
||||
: dx_device(dx_device)
|
||||
{
|
||||
if (!dx_device)
|
||||
{
|
||||
throw std::runtime_error("ID3D12Device is not valid");
|
||||
}
|
||||
const HRESULT hr = dx_device->CreateFence(0, D3D12_FENCE_FLAG_SHARED,
|
||||
IID_PPV_ARGS(&dx_fence));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
throw std::runtime_error("Failed to create the DirectX fence");
|
||||
}
|
||||
}
|
||||
47
test_conformance/common/directx_wrapper/directx_wrapper.hpp
Normal file
47
test_conformance/common/directx_wrapper/directx_wrapper.hpp
Normal file
@@ -0,0 +1,47 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <d3d12.h>
|
||||
#include <wrl/client.h>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace Microsoft::WRL;
|
||||
|
||||
class DirectXWrapper {
|
||||
public:
|
||||
DirectXWrapper();
|
||||
|
||||
ID3D12Device* getDXDevice() const;
|
||||
ID3D12CommandQueue* getDXCommandQueue() const;
|
||||
ID3D12CommandAllocator* getDXCommandAllocator() const;
|
||||
|
||||
protected:
|
||||
ComPtr<ID3D12Device> dx_device = nullptr;
|
||||
ComPtr<ID3D12CommandQueue> dx_command_queue = nullptr;
|
||||
ComPtr<ID3D12CommandAllocator> dx_command_allocator = nullptr;
|
||||
};
|
||||
|
||||
class DirectXFenceWrapper {
|
||||
public:
|
||||
DirectXFenceWrapper(ID3D12Device* dx_device);
|
||||
ID3D12Fence* operator*() const { return dx_fence.Get(); }
|
||||
|
||||
private:
|
||||
ComPtr<ID3D12Fence> dx_fence = nullptr;
|
||||
ComPtr<ID3D12Device> dx_device = nullptr;
|
||||
};
|
||||
@@ -57,7 +57,7 @@ namespace {
|
||||
template <typename T>
|
||||
int verify_degrees(const T *const inptr, const T *const outptr, int n)
|
||||
{
|
||||
float error, max_error = 0.0f;
|
||||
float error, max_error = -INFINITY;
|
||||
double r, max_val = NAN;
|
||||
int max_index = 0;
|
||||
|
||||
@@ -89,13 +89,16 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
|
||||
}
|
||||
|
||||
if (std::is_same<T, half>::value)
|
||||
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
|
||||
max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
|
||||
max_val, conv_to_flt(outptr[max_index]));
|
||||
log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
|
||||
"vs %g)\n",
|
||||
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
|
||||
conv_to_flt(outptr[max_index]), max_val,
|
||||
conv_to_flt(outptr[max_index]));
|
||||
else
|
||||
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
|
||||
max_error, max_index, max_val, outptr[max_index], max_val,
|
||||
outptr[max_index]);
|
||||
log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
|
||||
"vs %g)\n",
|
||||
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
|
||||
outptr[max_index], max_val, outptr[max_index]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -103,7 +106,7 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
|
||||
template <typename T>
|
||||
int verify_radians(const T *const inptr, const T *const outptr, int n)
|
||||
{
|
||||
float error, max_error = 0.0f;
|
||||
float error, max_error = -INFINITY;
|
||||
double r, max_val = NAN;
|
||||
int max_index = 0;
|
||||
|
||||
@@ -135,13 +138,16 @@ int verify_radians(const T *const inptr, const T *const outptr, int n)
|
||||
}
|
||||
|
||||
if (std::is_same<T, half>::value)
|
||||
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
|
||||
max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
|
||||
max_val, conv_to_flt(outptr[max_index]));
|
||||
log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
|
||||
"vs %g)\n",
|
||||
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
|
||||
conv_to_flt(outptr[max_index]), max_val,
|
||||
conv_to_flt(outptr[max_index]));
|
||||
else
|
||||
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
|
||||
max_error, max_index, max_val, outptr[max_index], max_val,
|
||||
outptr[max_index]);
|
||||
log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
|
||||
"vs %g)\n",
|
||||
max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
|
||||
outptr[max_index], max_val, outptr[max_index]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
set(MODULE_NAME COMPILER)
|
||||
|
||||
find_package(Python3 COMPONENTS Interpreter QUIET)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_build_helpers.cpp
|
||||
@@ -52,7 +54,7 @@ add_custom_command(
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
|
||||
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
|
||||
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
|
||||
COMMAND ${Python3_EXECUTABLE} ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
|
||||
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
|
||||
VERBATIM)
|
||||
|
||||
|
||||
@@ -13,10 +13,62 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/stringHelpers.h"
|
||||
|
||||
std::string spvBinariesPath = "spirv_bin";
|
||||
const std::string spvBinariesPathArg = "--spirv-binaries-path";
|
||||
|
||||
void printUsage()
|
||||
{
|
||||
log_info("Reading SPIR-V files from default '%s' path.\n",
|
||||
spvBinariesPath.c_str());
|
||||
log_info("In case you want to set other directory use '%s' argument.\n",
|
||||
spvBinariesPathArg.c_str());
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
bool modifiedSpvBinariesPath = false;
|
||||
bool listTests = false;
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
int argsRemoveNum = 0;
|
||||
if (argv[i] == spvBinariesPathArg)
|
||||
{
|
||||
if (i + 1 == argc)
|
||||
{
|
||||
log_error("Missing value for '%s' argument.\n",
|
||||
spvBinariesPathArg.c_str());
|
||||
return TEST_FAIL;
|
||||
}
|
||||
else
|
||||
{
|
||||
spvBinariesPath = std::string(argv[i + 1]);
|
||||
argsRemoveNum += 2;
|
||||
modifiedSpvBinariesPath = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (argsRemoveNum > 0)
|
||||
{
|
||||
for (int j = i; j < (argc - argsRemoveNum); ++j)
|
||||
argv[j] = argv[j + argsRemoveNum];
|
||||
|
||||
argc -= argsRemoveNum;
|
||||
--i;
|
||||
}
|
||||
listTests |= (argv[i] == std::string("--list")
|
||||
|| argv[i] == std::string("-list"));
|
||||
}
|
||||
if (modifiedSpvBinariesPath == false && !listTests)
|
||||
{
|
||||
printUsage();
|
||||
}
|
||||
|
||||
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
|
||||
test_registry::getInstance().definitions(), false, 0);
|
||||
}
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/stringHelpers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#define MAX_LINE_SIZE_IN_PROGRAM 1024
|
||||
#define MAX_LOG_SIZE_IN_PROGRAM 2048
|
||||
@@ -158,6 +160,14 @@ const char *link_static_function_access = // use with compile_static_function
|
||||
"extern int foo(int, int);\n"
|
||||
"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
|
||||
|
||||
const char *multi_build_test_kernel = R"(
|
||||
__kernel void test_kernel(__global int *dst)
|
||||
{
|
||||
int tid = get_global_id(0);
|
||||
dst[tid] = BUILD_OPT_VAL;
|
||||
}
|
||||
)";
|
||||
|
||||
static int test_large_single_compile(cl_context context, cl_device_id deviceID,
|
||||
unsigned int numLines)
|
||||
{
|
||||
@@ -3059,12 +3069,13 @@ REGISTER_TEST(execute_after_included_header_link)
|
||||
}
|
||||
|
||||
const auto simple_header_path = temp_dir_path / simple_header_name;
|
||||
FILE *simple_header_file =
|
||||
fopen(simple_header_path.u8string().c_str(), "w");
|
||||
const std::string simple_header_path_str =
|
||||
to_string(simple_header_path.u8string());
|
||||
FILE *simple_header_file = fopen(simple_header_path_str.c_str(), "w");
|
||||
if (simple_header_file == NULL)
|
||||
{
|
||||
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
|
||||
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||
simple_header_path_str.c_str(), __FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (fprintf(simple_header_file, "%s", simple_header) < 0)
|
||||
@@ -3082,7 +3093,7 @@ REGISTER_TEST(execute_after_included_header_link)
|
||||
}
|
||||
|
||||
const std::string include_path =
|
||||
std::string("-I") + temp_dir_path.generic_u8string();
|
||||
std::string("-I") + to_string(temp_dir_path.generic_u8string());
|
||||
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
|
||||
NULL, NULL, NULL);
|
||||
test_error(error,
|
||||
@@ -3931,3 +3942,100 @@ REGISTER_TEST(compile_and_link_status_options_log)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
REGISTER_TEST(multiple_build_program)
|
||||
{
|
||||
if (gCompilationMode != kOnline)
|
||||
{
|
||||
log_info(
|
||||
"Skipping multiple_build_program, compilation mode not online\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
const size_t num_threads = num_elements;
|
||||
|
||||
clProgramWrapper program = clCreateProgramWithSource(
|
||||
context, 1, &multi_build_test_kernel, nullptr, &error);
|
||||
test_error(error, "clCreateProgramWithSource failed");
|
||||
|
||||
clMemWrapper out_stream_0 = clCreateBuffer(
|
||||
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
clMemWrapper out_stream_1 = clCreateBuffer(
|
||||
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
{
|
||||
/* Build with the macro defined */
|
||||
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=1 ", NULL,
|
||||
NULL);
|
||||
test_error(error, "clBuildProgram failed");
|
||||
|
||||
clKernelWrapper kernel0 =
|
||||
clCreateKernel(program, "test_kernel", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
error = clSetKernelArg(kernel0, 0, sizeof(out_stream_0), &out_stream_0);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
{
|
||||
/* Rebuild with the macro redefined */
|
||||
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=2 ", NULL,
|
||||
NULL);
|
||||
test_error(error, "clBuildProgram failed");
|
||||
|
||||
clKernelWrapper kernel1 =
|
||||
clCreateKernel(program, "test_kernel", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
error = clSetKernelArg(kernel1, 0, sizeof(out_stream_1), &out_stream_1);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
std::vector<cl_int> test_values(num_threads, 0);
|
||||
error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,
|
||||
sizeof(cl_int) * num_threads,
|
||||
test_values.data(), 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < test_values.size(); i++)
|
||||
{
|
||||
if (test_values[i] != 1)
|
||||
{
|
||||
log_error("Unexpected test value %d for kernel0 at pos %zu.\n",
|
||||
test_values[i], i);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out_stream_1, true, 0,
|
||||
sizeof(cl_int) * num_threads,
|
||||
test_values.data(), 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < test_values.size(); i++)
|
||||
{
|
||||
if (test_values[i] != 2)
|
||||
{
|
||||
log_error("Unexpected test value %d for kernel1 at pos %zu.\n",
|
||||
test_values[i], i);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
@@ -94,6 +94,7 @@ const char *known_extensions[] = {
|
||||
"cl_khr_external_memory_dma_buf",
|
||||
"cl_khr_command_buffer",
|
||||
"cl_khr_command_buffer_mutable_dispatch",
|
||||
"cl_khr_command_buffer_mutable_memory_commands",
|
||||
"cl_khr_command_buffer_multi_device",
|
||||
"cl_khr_external_memory_android_hardware_buffer",
|
||||
"cl_khr_unified_svm",
|
||||
|
||||
@@ -32,8 +32,6 @@ const std::string slash = "\\";
|
||||
#else
|
||||
const std::string slash = "/";
|
||||
#endif
|
||||
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
|
||||
+ slash + "spirv_bin" + slash + "write_kernel.spv";
|
||||
|
||||
const std::string spvExt = ".spv";
|
||||
|
||||
@@ -338,8 +336,8 @@ public:
|
||||
|
||||
std::vector<unsigned char> kernel_buffer;
|
||||
|
||||
std::string file_name =
|
||||
compilerSpvBinaries + std::to_string(address_bits);
|
||||
std::string file_name = spvBinariesPath + slash + "write_kernel.spv"
|
||||
+ std::to_string(address_bits);
|
||||
m_spirv_binary = readBinary(file_name.c_str());
|
||||
m_spirv_size = m_spirv_binary.size();
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
extern std::string spvBinariesPath;
|
||||
|
||||
static const char write_kernel_source[] = R"(
|
||||
kernel void write_kernel(global unsigned int *p) {
|
||||
|
||||
@@ -15,3 +15,6 @@ add_subdirectory( cl_ext_buffer_device_address )
|
||||
if(VULKAN_IS_SUPPORTED)
|
||||
add_subdirectory( cl_khr_external_semaphore )
|
||||
endif()
|
||||
if(D3D12_IS_SUPPORTED)
|
||||
add_subdirectory( cl_khr_external_semaphore_dx_fence )
|
||||
endif()
|
||||
@@ -17,6 +17,7 @@ set(${MODULE_NAME}_SOURCES
|
||||
command_buffer_test_barrier.cpp
|
||||
command_buffer_test_event_info.cpp
|
||||
command_buffer_finalize.cpp
|
||||
command_buffer_pipelined_enqueue.cpp
|
||||
negative_command_buffer_finalize.cpp
|
||||
negative_command_buffer_svm_mem.cpp
|
||||
negative_command_buffer_copy_image.cpp
|
||||
|
||||
@@ -27,9 +27,6 @@ BasicCommandBufferTest::BasicCommandBufferTest(cl_device_id device,
|
||||
: CommandBufferTestBase(device), context(context), queue(nullptr),
|
||||
num_elements(0), simultaneous_use_support(false),
|
||||
out_of_order_support(false), queue_out_of_order_support(false),
|
||||
// try to use simultaneous path by default
|
||||
simultaneous_use_requested(true),
|
||||
// due to simultaneous cases extend buffer size
|
||||
buffer_size_multiplier(1), command_buffer(this)
|
||||
{
|
||||
cl_int error = clRetainCommandQueue(queue);
|
||||
@@ -72,9 +69,8 @@ bool BasicCommandBufferTest::Skip()
|
||||
sizeof(capabilities), &capabilities, NULL);
|
||||
test_error(error,
|
||||
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
|
||||
simultaneous_use_support = simultaneous_use_requested
|
||||
&& (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR)
|
||||
!= 0;
|
||||
simultaneous_use_support =
|
||||
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR) != 0;
|
||||
out_of_order_support =
|
||||
supported_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
device_side_enqueue_support =
|
||||
@@ -167,19 +163,7 @@ cl_int BasicCommandBufferTest::SetUp(int elements)
|
||||
error = SetUpKernelArgs();
|
||||
test_error(error, "SetUpKernelArgs failed");
|
||||
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
cl_command_buffer_properties_khr properties[3] = {
|
||||
CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR,
|
||||
0
|
||||
};
|
||||
command_buffer =
|
||||
clCreateCommandBufferKHR(1, &queue, properties, &error);
|
||||
}
|
||||
else
|
||||
{
|
||||
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
|
||||
}
|
||||
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -192,11 +176,6 @@ cl_int MultiFlagCreationTest::Run()
|
||||
|
||||
// First try to find multiple flags that are supported by the driver and
|
||||
// device.
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
}
|
||||
|
||||
if (is_extension_available(
|
||||
device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
|
||||
{
|
||||
@@ -207,6 +186,11 @@ cl_int MultiFlagCreationTest::Run()
|
||||
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
|
||||
{
|
||||
flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
cl_command_buffer_properties_khr props[] = { CL_COMMAND_BUFFER_FLAGS_KHR,
|
||||
@@ -381,11 +365,6 @@ cl_int ExplicitFlushTest::Run()
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool ExplicitFlushTest::Skip()
|
||||
{
|
||||
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
|
||||
}
|
||||
|
||||
cl_int InterleavedEnqueueTest::Run()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
@@ -431,11 +410,6 @@ cl_int InterleavedEnqueueTest::Run()
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool InterleavedEnqueueTest::Skip()
|
||||
{
|
||||
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
|
||||
}
|
||||
|
||||
cl_int EnqueueAndReleaseTest::Run()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
|
||||
@@ -78,8 +78,11 @@ protected:
|
||||
bool queue_out_of_order_support;
|
||||
bool device_side_enqueue_support;
|
||||
|
||||
// user request for simultaneous use
|
||||
bool simultaneous_use_requested;
|
||||
// Extends size of created 'in_mem' & 'out_mem' buffers, such that the same
|
||||
// cl_mem buffer can be used across multiple enqueues of a command-buffer.
|
||||
// Accessed in the kernel at an offset for each enqueue which is passed as
|
||||
// a kernel parameter through the 'off_mem' buffer.
|
||||
// See BasicCommandBufferTest::SetUpKernel() definition.
|
||||
unsigned buffer_size_multiplier;
|
||||
clCommandBufferWrapper command_buffer;
|
||||
};
|
||||
@@ -116,7 +119,6 @@ struct ExplicitFlushTest : public BasicCommandBufferTest
|
||||
using BasicCommandBufferTest::BasicCommandBufferTest;
|
||||
|
||||
cl_int Run() override;
|
||||
bool Skip() override;
|
||||
};
|
||||
|
||||
// Test enqueueing a command-buffer twice separated by another enqueue operation
|
||||
@@ -125,7 +127,6 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
|
||||
using BasicCommandBufferTest::BasicCommandBufferTest;
|
||||
|
||||
cl_int Run() override;
|
||||
bool Skip() override;
|
||||
};
|
||||
|
||||
// Test releasing a command-buffer after it has been submitted for execution,
|
||||
@@ -156,9 +157,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
|
||||
cl_version extension_version =
|
||||
get_extension_version(device, "cl_khr_command_buffer");
|
||||
|
||||
if (extension_version != CL_MAKE_VERSION(0, 9, 7))
|
||||
if (extension_version != CL_MAKE_VERSION(0, 9, 8))
|
||||
{
|
||||
log_info("cl_khr_command_buffer version 0.9.7 is required to run "
|
||||
log_info("cl_khr_command_buffer version 0.9.8 is required to run "
|
||||
"the test, skipping.\n ");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
@@ -50,13 +50,14 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
|
||||
|
||||
virtual cl_int SetUp(int elements) override
|
||||
{
|
||||
BasicCommandBufferTest::SetUp(elements);
|
||||
cl_int error = BasicCommandBufferTest::SetUp(elements);
|
||||
test_error(error, "BasicCommandBufferTest::SetUp failed");
|
||||
|
||||
cl_int error = init_extension_functions();
|
||||
error = init_extension_functions();
|
||||
test_error(error, "Unable to initialise extension functions");
|
||||
|
||||
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
if (simultaneous_use_support)
|
||||
if (simultaneous_use_requested)
|
||||
{
|
||||
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
}
|
||||
@@ -90,10 +91,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
|
||||
cl_version extension_version = get_extension_version(
|
||||
device, "cl_khr_command_buffer_mutable_dispatch");
|
||||
|
||||
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
|
||||
if (extension_version != CL_MAKE_VERSION(0, 9, 4))
|
||||
{
|
||||
log_info("cl_khr_command_buffer_mutable_dispatch version "
|
||||
"0.9.3 is "
|
||||
"0.9.4 is "
|
||||
"required to run the test, skipping.\n ");
|
||||
extension_avaliable = false;
|
||||
}
|
||||
@@ -128,6 +129,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
|
||||
}
|
||||
|
||||
clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
|
||||
bool simultaneous_use_requested = false;
|
||||
|
||||
const char* kernelString = "__kernel void empty() {}";
|
||||
const size_t global_work_size = 4 * 16;
|
||||
|
||||
@@ -70,9 +70,9 @@ struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest
|
||||
{
|
||||
int offset = get_global_id(0);
|
||||
|
||||
int4 color = read_imagei( source, sampler, offset );
|
||||
uint4 color = read_imageui( source, sampler, offset );
|
||||
|
||||
write_imagei( dest, offset, color );
|
||||
write_imageui( dest, offset, color );
|
||||
})";
|
||||
|
||||
cl_int error;
|
||||
@@ -260,9 +260,9 @@ struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
int4 color = read_imagei( source, sampler, (int2) (x, y) );
|
||||
uint4 color = read_imageui( source, sampler, (int2) (x, y) );
|
||||
|
||||
write_imagei( dest, (int2) (x, y), color );
|
||||
write_imageui( dest, (int2) (x, y), color );
|
||||
})";
|
||||
|
||||
cl_int error;
|
||||
|
||||
@@ -35,9 +35,7 @@ struct IterativeArgUpdateDispatch : BasicMutableCommandBufferTest
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
command(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = false;
|
||||
}
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
|
||||
@@ -33,9 +33,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
command_pri(nullptr), command_sec(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = false;
|
||||
}
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
@@ -47,7 +45,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
|
||||
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
|
||||
|
||||
// require mutable arguments capabillity
|
||||
// require mutable arguments capability
|
||||
return !mutable_support;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,9 +34,7 @@ struct OverwriteUpdateDispatch : BasicMutableCommandBufferTest
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
command(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = false;
|
||||
}
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
|
||||
@@ -21,10 +21,12 @@
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// mutable dispatch tests which handle following cases:
|
||||
// - out-of-order queue use
|
||||
// mutable dispatch tests which handles
|
||||
// - out-of-order queue with dependencies between command-buffer enqueues
|
||||
// - out-of-order queue with simultaneous use
|
||||
// - in-order queue with dependencies between command-buffer enqueues
|
||||
// - in-order queue with simultaneous use
|
||||
// - cross queue with dependencies between command-buffer enqueues
|
||||
// - cross-queue with simultaneous use
|
||||
|
||||
namespace {
|
||||
@@ -35,11 +37,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
SimultaneousMutableDispatchTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
work_queue(nullptr), work_command_buffer(this), user_event(nullptr),
|
||||
wait_pass_event(nullptr), command(nullptr)
|
||||
work_queue(nullptr), work_command_buffer(this), new_in_mem(nullptr),
|
||||
command(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = simultaneous_request;
|
||||
if (simultaneous_request) buffer_size_multiplier = 2;
|
||||
}
|
||||
|
||||
cl_int SetUpKernel() override
|
||||
@@ -48,26 +49,36 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
|
||||
|
||||
// create additional kernel to properly prepare output buffer for test
|
||||
const char* kernel_str =
|
||||
const char *kernel_str =
|
||||
R"(
|
||||
__kernel void fill(int pattern, __global int* out, __global int*
|
||||
offset)
|
||||
__kernel void mul(__global int* out, __global int* in, int mul_val)
|
||||
{
|
||||
size_t id = get_global_id(0);
|
||||
size_t ind = offset[0] + id ;
|
||||
out[ind] = pattern;
|
||||
out[id] = in[id] * mul_val;
|
||||
})";
|
||||
|
||||
error = create_single_kernel_helper_create_program(
|
||||
context, &program_fill, 1, &kernel_str);
|
||||
context, &program_mul, 1, &kernel_str);
|
||||
test_error(error, "Failed to create program with source");
|
||||
|
||||
error =
|
||||
clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
|
||||
clBuildProgram(program_mul, 1, &device, nullptr, nullptr, nullptr);
|
||||
test_error(error, "Failed to build program");
|
||||
|
||||
kernel_fill = clCreateKernel(program_fill, "fill", &error);
|
||||
test_error(error, "Failed to create copy kernel");
|
||||
kernel_mul = clCreateKernel(program_mul, "mul", &error);
|
||||
test_error(error, "Failed to create multiply kernel");
|
||||
|
||||
new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(cl_int) * num_elements
|
||||
* buffer_size_multiplier,
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
new_in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(cl_int) * num_elements
|
||||
* buffer_size_multiplier,
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -77,14 +88,13 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
|
||||
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
|
||||
|
||||
error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
|
||||
&overwritten_pattern);
|
||||
error = clSetKernelArg(kernel_mul, 0, sizeof(out_mem), &out_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
|
||||
error = clSetKernelArg(kernel_mul, 1, sizeof(off_mem), &in_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
|
||||
error = clSetKernelArg(kernel_mul, 2, sizeof(cl_int), &pattern_pri);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -101,30 +111,28 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
|
||||
&error);
|
||||
test_error(error, "Unable to create command queue to test with");
|
||||
|
||||
cl_command_buffer_properties_khr prop =
|
||||
CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
}
|
||||
|
||||
const cl_command_buffer_properties_khr props[] = {
|
||||
CL_COMMAND_BUFFER_FLAGS_KHR,
|
||||
prop,
|
||||
0,
|
||||
};
|
||||
|
||||
work_command_buffer =
|
||||
clCreateCommandBufferKHR(1, &work_queue, props, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
work_queue = queue;
|
||||
work_command_buffer = command_buffer;
|
||||
}
|
||||
|
||||
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
|
||||
if (simultaneous_use_requested)
|
||||
{
|
||||
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
}
|
||||
|
||||
const cl_command_buffer_properties_khr props[] = {
|
||||
CL_COMMAND_BUFFER_FLAGS_KHR,
|
||||
prop,
|
||||
0,
|
||||
};
|
||||
|
||||
work_command_buffer =
|
||||
clCreateCommandBufferKHR(1, &work_queue, props, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -145,293 +153,245 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
|| !mutable_support;
|
||||
}
|
||||
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
work_command_buffer, nullptr, nullptr, kernel_mul, 1, nullptr,
|
||||
&num_elements, nullptr, 0, nullptr, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(work_command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RunSerializedPass(std::vector<cl_int> &first_enqueue_output,
|
||||
std::vector<cl_int> &second_enqueue_output)
|
||||
{
|
||||
/* Serialize command-buffer enqueue, is a linear sequence of
|
||||
* commands, with dependencies enforced using an in-order queue
|
||||
* or cl_event dependencies.
|
||||
*
|
||||
* 1. Fill input buffer
|
||||
* 2. Enqueue command-buffer doing: `output = a * input;
|
||||
* 3. Read output buffer to host data so it can be verified later
|
||||
* - Update command to new input buffer, new `a` val and use output
|
||||
* buffer from previous invocation as new input buffer.
|
||||
* 4. Enqueue command-buffer again.
|
||||
* 5. Read new output buffer back to host data so it can be verified
|
||||
* later
|
||||
*
|
||||
*/
|
||||
clEventWrapper E[4];
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
|
||||
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[0] : nullptr),
|
||||
(out_of_order_request ? &E[1] : nullptr));
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE, 0,
|
||||
data_size(), first_enqueue_output.data(),
|
||||
(out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[1] : nullptr),
|
||||
(out_of_order_request ? &E[2] : nullptr));
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &out_mem };
|
||||
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
|
||||
|
||||
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
3 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
args /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void* configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[2] : nullptr),
|
||||
(out_of_order_request ? &E[3] : nullptr));
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
|
||||
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[3] : nullptr), nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RunSimultaneousPass(std::vector<cl_int> &first_enqueue_output,
|
||||
std::vector<cl_int> &second_enqueue_output)
|
||||
{
|
||||
/* Simultaneous command-buffer pass enqueues a command-buffer twice
|
||||
* without dependencies between the enqueues, but an update so that
|
||||
* all the parameters are different to avoid race conditions in the
|
||||
* kernel execution. The asynchronous task graph looks like:
|
||||
*
|
||||
* (Fill input A buffer) (Fill input B buffer)
|
||||
* | |
|
||||
* (Enqueue command_buffer) (Enqueue updated command_buffer)
|
||||
* | |
|
||||
* (Read output A buffer) (Read output B buffer)
|
||||
*/
|
||||
clEventWrapper E[4];
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
|
||||
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clEnqueueFillBuffer(work_queue, new_in_mem, &pattern_fill_2,
|
||||
sizeof(cl_int), 0, data_size(), 0, nullptr,
|
||||
(out_of_order_request ? &E[1] : nullptr));
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[0] : nullptr),
|
||||
(out_of_order_request ? &E[2] : nullptr));
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &new_in_mem };
|
||||
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
|
||||
|
||||
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
3 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
args /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void* configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[1] : nullptr),
|
||||
(out_of_order_request ? &E[3] : nullptr));
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
work_queue, out_mem, CL_FALSE, 0, data_size(),
|
||||
first_enqueue_output.data(), (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[2] : nullptr), nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
|
||||
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
|
||||
(out_of_order_request ? &E[3] : nullptr), nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int VerifySerializedPass(std::vector<cl_int> &first_enqueue_output,
|
||||
std::vector<cl_int> &second_enqueue_output)
|
||||
{
|
||||
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
|
||||
const cl_int second_enqueue_ref = pattern_sec * first_enqueue_ref;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
|
||||
i);
|
||||
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
|
||||
second_enqueue_output[i], i);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int VerifySimultaneousPass(std::vector<cl_int> &first_enqueue_output,
|
||||
std::vector<cl_int> &second_enqueue_output)
|
||||
{
|
||||
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
|
||||
const cl_int second_enqueue_ref = pattern_sec * pattern_fill_2;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
|
||||
i);
|
||||
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
|
||||
second_enqueue_output[i], i);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
if (simultaneous_use_support)
|
||||
std::vector<cl_int> first_enqueue_output(num_elements);
|
||||
std::vector<cl_int> second_enqueue_output(num_elements);
|
||||
|
||||
if (simultaneous_use_requested)
|
||||
{
|
||||
// enqueue simultaneous command-buffers with out-of-order calls
|
||||
error = RunSimultaneous();
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
error = RunSimultaneousPass(first_enqueue_output,
|
||||
second_enqueue_output);
|
||||
test_error(error, "RunSimultaneousPass failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
// enqueue single command-buffer with out-of-order calls
|
||||
error = RunSingle();
|
||||
test_error(error, "RunSingle failed");
|
||||
error =
|
||||
RunSerializedPass(first_enqueue_output, second_enqueue_output);
|
||||
test_error(error, "RunSerializedPass failed");
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_sync_point_khr sync_points[2];
|
||||
const cl_int pattern = pattern_pri;
|
||||
cl_int error = clCommandFillBufferKHR(
|
||||
work_command_buffer, nullptr, nullptr, in_mem, &pattern,
|
||||
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
|
||||
nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
error = clCommandFillBufferKHR(work_command_buffer, nullptr, nullptr,
|
||||
out_mem, &overwritten_pattern,
|
||||
sizeof(cl_int), 0, data_size(), 0,
|
||||
nullptr, &sync_points[1], nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
|
||||
&num_elements, nullptr, 2, sync_points, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(work_command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RunSingle()
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
|
||||
nullptr, &single_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error =
|
||||
clEnqueueReadBuffer(work_queue, out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 1, &single_event, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
|
||||
}
|
||||
|
||||
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(cl_int) * num_elements
|
||||
* buffer_size_multiplier,
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
|
||||
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
1 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
args /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void* configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
|
||||
nullptr, &single_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_TRUE, 0,
|
||||
data_size(), output_data.data(), 1,
|
||||
&single_event, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RecordSimultaneousCommandBuffer()
|
||||
{
|
||||
cl_sync_point_khr sync_points[2];
|
||||
// for both simultaneous passes this call will fill entire in_mem buffer
|
||||
cl_int error = clCommandFillBufferKHR(
|
||||
work_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
|
||||
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
|
||||
&sync_points[0], nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
// to avoid overwriting the entire result buffer instead of filling
|
||||
// only relevant part this additional kernel was introduced
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
work_command_buffer, nullptr, nullptr, kernel_fill, 1, nullptr,
|
||||
&num_elements, nullptr, 0, nullptr, &sync_points[1], &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
|
||||
&num_elements, nullptr, 2, sync_points, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(work_command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
struct SimulPassData
|
||||
{
|
||||
cl_int offset;
|
||||
std::vector<cl_int> output_buffer;
|
||||
std::vector<cl_int> updated_output_buffer;
|
||||
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
|
||||
clEventWrapper wait_events[3];
|
||||
};
|
||||
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if (!user_event)
|
||||
{
|
||||
user_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
pd.wait_events[0] = user_event;
|
||||
|
||||
// filling offset buffer must wait for previous pass completeness
|
||||
error = clEnqueueFillBuffer(
|
||||
work_queue, off_mem, &pd.offset, sizeof(cl_int), 0, sizeof(cl_int),
|
||||
(wait_pass_event != nullptr ? 1 : 0),
|
||||
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
|
||||
&pd.wait_events[1]);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
// command buffer execution must wait for two wait-events
|
||||
error =
|
||||
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
|
||||
&pd.wait_events[0], &pd.wait_events[2]);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE,
|
||||
pd.offset * sizeof(cl_int), data_size(),
|
||||
pd.output_buffer.data(), 1,
|
||||
&pd.wait_events[2], nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(cl_int) * num_elements
|
||||
* buffer_size_multiplier,
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
// Retain new output memory object until the end of the test.
|
||||
retained_output_buffers.push_back(new_out_mem);
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
|
||||
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
1 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
args /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void* configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
// command buffer execution must wait for two wait-events
|
||||
error =
|
||||
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
|
||||
&pd.wait_events[0], &pd.wait_events[2]);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
|
||||
pd.offset * sizeof(cl_int), data_size(),
|
||||
pd.updated_output_buffer.data(), 1,
|
||||
&pd.wait_events[2], nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RunSimultaneous()
|
||||
{
|
||||
cl_int error = RecordSimultaneousCommandBuffer();
|
||||
test_error(error, "RecordSimultaneousCommandBuffer failed");
|
||||
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
{ 0, std::vector<cl_int>(num_elements),
|
||||
std::vector<cl_int>(num_elements) },
|
||||
{ offset, std::vector<cl_int>(num_elements),
|
||||
std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
{
|
||||
error = EnqueueSimultaneousPass(pass);
|
||||
test_error(error, "EnqueueSimultaneousPass failed");
|
||||
|
||||
wait_pass_event = pass.wait_events[2];
|
||||
}
|
||||
|
||||
error = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clFinish(work_queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// verify the result buffers
|
||||
auto& first_pass_output = simul_passes[0].output_buffer;
|
||||
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
|
||||
auto& second_pass_output = simul_passes[1].output_buffer;
|
||||
auto& second_pass_updated_output =
|
||||
simul_passes[1].updated_output_buffer;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
if (simultaneous_use_requested)
|
||||
{
|
||||
// First pass:
|
||||
// Before updating, out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
|
||||
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
|
||||
i);
|
||||
// Second pass:
|
||||
// Before updating, out_mem is filled with overwritten_pattern
|
||||
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
|
||||
i);
|
||||
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
|
||||
i);
|
||||
error = VerifySimultaneousPass(first_enqueue_output,
|
||||
second_enqueue_output);
|
||||
test_error(error, "VerifySimultaneousPass failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = VerifySerializedPass(first_enqueue_output,
|
||||
second_enqueue_output);
|
||||
test_error(error, "VerifySerializedPass failed");
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -440,22 +400,20 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
clCommandQueueWrapper work_queue;
|
||||
clCommandBufferWrapper work_command_buffer;
|
||||
|
||||
clEventWrapper user_event;
|
||||
clEventWrapper single_event;
|
||||
clEventWrapper wait_pass_event;
|
||||
clKernelWrapper kernel_mul;
|
||||
clProgramWrapper program_mul;
|
||||
|
||||
clKernelWrapper kernel_fill;
|
||||
clProgramWrapper program_fill;
|
||||
clMemWrapper new_out_mem, new_in_mem;
|
||||
|
||||
std::vector<clMemWrapper> retained_output_buffers;
|
||||
|
||||
const size_t test_global_work_size = 3 * sizeof(cl_int);
|
||||
const cl_int pattern_pri = 42;
|
||||
const cl_int pattern_sec = 0xACDC;
|
||||
const cl_int pattern_fill = 0xA;
|
||||
const cl_int pattern_fill_2 = -3;
|
||||
|
||||
const cl_int overwritten_pattern = 0xACDC;
|
||||
cl_mutable_command_khr command;
|
||||
};
|
||||
|
||||
template <bool simultaneous_use_request>
|
||||
struct CrossQueueSimultaneousMutableDispatchTest
|
||||
: public BasicMutableCommandBufferTest
|
||||
{
|
||||
@@ -463,9 +421,9 @@ struct CrossQueueSimultaneousMutableDispatchTest
|
||||
cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
queue_sec(nullptr), command(nullptr)
|
||||
queue_sec(nullptr), new_out_mem(nullptr), command(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = true;
|
||||
simultaneous_use_requested = simultaneous_use_request;
|
||||
}
|
||||
|
||||
cl_int SetUpKernel() override
|
||||
@@ -488,6 +446,11 @@ struct CrossQueueSimultaneousMutableDispatchTest
|
||||
kernel = clCreateKernel(program, "fill", &error);
|
||||
test_error(error, "Failed to create copy kernel");
|
||||
|
||||
new_out_mem =
|
||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(cl_int) * num_elements, nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -530,24 +493,18 @@ struct CrossQueueSimultaneousMutableDispatchTest
|
||||
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
|
||||
|
||||
return !simultaneous_use_support || !mutable_support;
|
||||
return (simultaneous_use_requested && !simultaneous_use_support)
|
||||
|| !mutable_support;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
// record command buffer
|
||||
cl_int pattern = 0;
|
||||
cl_int error = clCommandFillBufferKHR(
|
||||
command_buffer, nullptr, nullptr, out_mem, &pattern, sizeof(cl_int),
|
||||
0, data_size(), 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
cl_command_properties_khr props[] = {
|
||||
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
|
||||
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
|
||||
};
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
@@ -555,16 +512,15 @@ struct CrossQueueSimultaneousMutableDispatchTest
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
// enqueue command buffer to default queue
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
// If we are testing not using simultaneous-use then we need to use
|
||||
// an event to serialize the execution order to the command-buffer
|
||||
// submission to each queue.
|
||||
clEventWrapper E;
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, command_buffer, 0, nullptr,
|
||||
(simultaneous_use_requested ? nullptr : &E));
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// update mutable parameters
|
||||
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
data_size(), nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_0{ 0, sizeof(cl_int), &pattern_sec };
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
@@ -594,30 +550,35 @@ struct CrossQueueSimultaneousMutableDispatchTest
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
// enqueue command buffer to non-default queue
|
||||
error = clEnqueueCommandBufferKHR(1, &queue_sec, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
1, &queue_sec, command_buffer, (simultaneous_use_requested ? 0 : 1),
|
||||
(simultaneous_use_requested ? nullptr : &E), nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clFinish(queue_sec);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// read result of command buffer execution
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
std::vector<cl_int> sec_output_data(num_elements);
|
||||
error =
|
||||
clEnqueueReadBuffer(queue_sec, new_out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
sec_output_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// verify the result
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i);
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
|
||||
CHECK_VERIFICATION_ERROR(pattern_sec, sec_output_data[i], i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
clCommandQueueWrapper queue_sec;
|
||||
clMemWrapper new_out_mem;
|
||||
const cl_int pattern_pri = 42;
|
||||
const cl_int pattern_sec = 0xACDC;
|
||||
cl_mutable_command_khr command;
|
||||
@@ -637,14 +598,26 @@ REGISTER_TEST(mutable_dispatch_simultaneous_out_of_order)
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_in_order)
|
||||
{
|
||||
return MakeAndRunTest<SimultaneousMutableDispatchTest<false, false>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_simultaneous_in_order)
|
||||
{
|
||||
return MakeAndRunTest<SimultaneousMutableDispatchTest<true, false>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
|
||||
REGISTER_TEST(mutable_dispatch_cross_queue)
|
||||
{
|
||||
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest>(
|
||||
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<false>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
|
||||
{
|
||||
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<true>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
@@ -79,11 +79,7 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
|
||||
: BasicCommandBufferTest(device, context, queue),
|
||||
command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr),
|
||||
out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr)
|
||||
{
|
||||
simultaneous_use_requested =
|
||||
(event_mode == EventMode::RET_COMBUF_WAIT_FOR_COMBUF) ? true
|
||||
: false;
|
||||
}
|
||||
{}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernel() override
|
||||
@@ -159,9 +155,6 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
|
||||
{
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
if (simultaneous_use_requested && !simultaneous_use_support)
|
||||
return true;
|
||||
|
||||
if (out_of_order_requested && !out_of_order_support) return true;
|
||||
|
||||
return false;
|
||||
|
||||
@@ -48,6 +48,39 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
|
||||
: BasicCommandBufferTest(device, context, queue)
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
|
||||
{
|
||||
return !simultaneous_use_support
|
||||
|| !(is_extension_available(
|
||||
device,
|
||||
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
|
||||
cl_int error = BasicCommandBufferTest::SetUp(elements);
|
||||
test_error(error, "BasicCommandBufferTest::SetUp() failed");
|
||||
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
|
||||
{
|
||||
cl_command_buffer_properties_khr properties[3] = {
|
||||
CL_COMMAND_BUFFER_FLAGS_KHR,
|
||||
CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
|
||||
};
|
||||
command_buffer =
|
||||
clCreateCommandBufferKHR(1, &queue, properties, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int Run() override
|
||||
{
|
||||
@@ -237,33 +270,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
|
||||
test_error(error, "verify_state failed");
|
||||
|
||||
error = clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
clEventWrapper trigger_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
|
||||
clEventWrapper execute_event;
|
||||
// enqueued command buffer blocked on user event
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&trigger_event, &execute_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// execute command buffer
|
||||
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
||||
|
||||
test_error(error, "verify_state failed");
|
||||
|
||||
test_error(signal_error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clWaitForEvents(1, &execute_event);
|
||||
test_error(error, "Unable to wait for execute event");
|
||||
|
||||
// verify executable state
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
|
||||
test_error(error, "verify_state failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,11 +21,9 @@
|
||||
namespace {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// out-of-order tests for cl_khr_command_buffer which handles below cases:
|
||||
// -test case for out-of-order command-buffer
|
||||
// -test an out-of-order command-buffer with simultaneous use
|
||||
// Tests for cl_khr_command_buffer which handles submitting a command-buffer to
|
||||
// an out-of-order queue.
|
||||
|
||||
template <bool simultaneous_request>
|
||||
struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
{
|
||||
OutOfOrderTest(cl_device_id device, cl_context context,
|
||||
@@ -35,18 +33,11 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
|
||||
program_fill(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = simultaneous_request;
|
||||
if (simultaneous_request) buffer_size_multiplier = 2;
|
||||
buffer_size_multiplier = 2; // two enqueues of command-buffer
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernel() override
|
||||
{
|
||||
// if device doesn't support simultaneous use which was requested
|
||||
// we can skip creation of OCL resources
|
||||
if (simultaneous_use_requested && !simultaneous_use_support)
|
||||
return CL_SUCCESS;
|
||||
|
||||
cl_int error = BasicCommandBufferTest::SetUpKernel();
|
||||
test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
|
||||
|
||||
@@ -74,14 +65,8 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernelArgs() override
|
||||
{
|
||||
// if device doesn't support simultaneous use which was requested
|
||||
// we can skip creation of OCL resources
|
||||
if (simultaneous_use_requested && !simultaneous_use_support)
|
||||
return CL_SUCCESS;
|
||||
|
||||
cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
|
||||
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
|
||||
|
||||
@@ -98,7 +83,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
cl_int error = BasicCommandBufferTest::SetUp(elements);
|
||||
@@ -108,110 +92,23 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
|
||||
test_error(error, "Unable to create command queue to test with");
|
||||
|
||||
cl_command_buffer_properties_khr properties[3] = {
|
||||
CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0
|
||||
};
|
||||
|
||||
if (simultaneous_use_requested && simultaneous_use_support)
|
||||
properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
|
||||
out_of_order_command_buffer = clCreateCommandBufferKHR(
|
||||
1, &out_of_order_queue, properties, &error);
|
||||
out_of_order_command_buffer =
|
||||
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
bool Skip() override
|
||||
{
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
if (!out_of_order_support
|
||||
|| (simultaneous_use_requested && !simultaneous_use_support))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return !out_of_order_support;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
// enqueue simultaneous command-buffers with out-of-order calls
|
||||
error = RunSimultaneous();
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
// enqueue single command-buffer with out-of-order calls
|
||||
error = RunSingle();
|
||||
test_error(error, "RunSingle failed");
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordCommandBuffer()
|
||||
cl_int RecordCommandBuffer() const
|
||||
{
|
||||
cl_sync_point_khr sync_points[2];
|
||||
const cl_int pattern = pattern_pri;
|
||||
cl_int error = clCommandFillBufferKHR(
|
||||
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern,
|
||||
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
|
||||
nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
|
||||
nullptr, out_mem, &overwritten_pattern,
|
||||
sizeof(cl_int), 0, data_size(), 0,
|
||||
nullptr, &sync_points[1], nullptr);
|
||||
test_error(error, "clCommandFillBufferKHR failed");
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
|
||||
&num_elements, nullptr, 2, sync_points, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSingle()
|
||||
{
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
|
||||
data_size(), output_data.data(), 1,
|
||||
&user_event, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordSimultaneousCommandBuffer() const
|
||||
{
|
||||
cl_sync_point_khr sync_points[2];
|
||||
// for both simultaneous passes this call will fill entire in_mem buffer
|
||||
// fill entire in_mem buffer
|
||||
cl_int error = clCommandFillBufferKHR(
|
||||
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
|
||||
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
|
||||
@@ -236,79 +133,63 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct SimulPassData
|
||||
struct EnqueuePassData
|
||||
{
|
||||
cl_int offset;
|
||||
std::vector<cl_int> output_buffer;
|
||||
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
|
||||
clEventWrapper wait_events[3];
|
||||
// 0: offset-buffer fill event, 2:kernel done event
|
||||
clEventWrapper wait_events[2];
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
cl_int EnqueuePass(EnqueuePassData& pd)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if (!user_event)
|
||||
{
|
||||
user_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
pd.wait_events[0] = user_event;
|
||||
|
||||
// filling offset buffer must wait for previous pass completeness
|
||||
error = clEnqueueFillBuffer(
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
|
||||
sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
|
||||
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
|
||||
&pd.wait_events[1]);
|
||||
&pd.wait_events[0]);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
// command buffer execution must wait for two wait-events
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
|
||||
&pd.wait_events[2]);
|
||||
0, nullptr, out_of_order_command_buffer, 1, &pd.wait_events[0],
|
||||
&pd.wait_events[1]);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
|
||||
pd.offset * sizeof(cl_int), data_size(),
|
||||
pd.output_buffer.data(), 1,
|
||||
&pd.wait_events[2], nullptr);
|
||||
&pd.wait_events[1], nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSimultaneous()
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = RecordSimultaneousCommandBuffer();
|
||||
test_error(error, "RecordSimultaneousCommandBuffer failed");
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
std::vector<EnqueuePassData> enqueue_passes = {
|
||||
{ 0, std::vector<cl_int>(num_elements) },
|
||||
{ offset, std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
error = EnqueueSimultaneousPass(pass);
|
||||
test_error(error, "EnqueueSimultaneousPass failed");
|
||||
error = EnqueuePass(pass);
|
||||
test_error(error, "EnqueuePass failed");
|
||||
|
||||
wait_pass_event = pass.wait_events[2];
|
||||
wait_pass_event = pass.wait_events[1];
|
||||
}
|
||||
|
||||
error = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clFinish(out_of_order_queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// verify the result buffers
|
||||
for (auto&& pass : simul_passes)
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
auto& res_data = pass.output_buffer;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
@@ -320,7 +201,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
clCommandQueueWrapper out_of_order_queue;
|
||||
clCommandBufferWrapper out_of_order_command_buffer;
|
||||
|
||||
@@ -338,12 +218,5 @@ struct OutOfOrderTest : public BasicCommandBufferTest
|
||||
|
||||
REGISTER_TEST(out_of_order)
|
||||
{
|
||||
return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(simultaneous_out_of_order)
|
||||
{
|
||||
return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
|
||||
num_elements);
|
||||
return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,321 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "basic_command_buffer.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Tests for multiple sequential submissions of a command-buffer without a
|
||||
// blocking wait between them, but using the following mechanisms to serialize
|
||||
// execution of the submissions.
|
||||
// * In-order queue dependencies
|
||||
// * Event dependencies in command-buffer submissions to an out-of-order queue
|
||||
// * Barrier submissions between command-buffer submissions to an out-of-order
|
||||
// queue
|
||||
|
||||
// Base class that individual test fixtures are derived from
|
||||
struct CommandBufferPipelined : public BasicCommandBufferTest
|
||||
{
|
||||
CommandBufferPipelined(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int SetUpKernel() override
|
||||
{
|
||||
const char* mul_kernel_str =
|
||||
R"(
|
||||
__kernel void mul_by_val(int in, __global int* data)
|
||||
{
|
||||
size_t id = get_global_id(0);
|
||||
data[id] *= in;
|
||||
}
|
||||
|
||||
__kernel void increment(__global int* data)
|
||||
{
|
||||
size_t id = get_global_id(0);
|
||||
data[id]++;
|
||||
})";
|
||||
|
||||
cl_int error = create_single_kernel_helper_create_program(
|
||||
context, &program, 1, &mul_kernel_str);
|
||||
test_error(error, "Failed to create program with source");
|
||||
|
||||
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
|
||||
test_error(error, "Failed to build program");
|
||||
|
||||
mul_kernel = clCreateKernel(program, "mul_by_val", &error);
|
||||
test_error(error, "Failed to create mul_by_val kernel");
|
||||
|
||||
inc_kernel = clCreateKernel(program, "increment", &error);
|
||||
test_error(error, "Failed to create increment kernel");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int SetUpKernelArgs() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
num_elements * buffer_size_multiplier
|
||||
* sizeof(cl_int),
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
cl_int val_arg = pattern;
|
||||
error = clSetKernelArg(mul_kernel, 0, sizeof(cl_int), &val_arg);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(mul_kernel, 1, sizeof(out_mem), &out_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(inc_kernel, 0, sizeof(out_mem), &out_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RecordCommandBuffer(clCommandBufferWrapper& cmd_buf)
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
cmd_buf, nullptr, nullptr, inc_kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(cmd_buf);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
// Zero initialize buffer before starting test
|
||||
cl_int zero_pattern = 0;
|
||||
error =
|
||||
clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int),
|
||||
0, data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
const cl_int pattern = 42;
|
||||
|
||||
clKernelWrapper inc_kernel = nullptr;
|
||||
clKernelWrapper mul_kernel = nullptr;
|
||||
};
|
||||
|
||||
struct InOrderPipelined : public CommandBufferPipelined
|
||||
{
|
||||
InOrderPipelined(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: CommandBufferPipelined(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = RecordCommandBuffer(command_buffer);
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error =
|
||||
clEnqueueNDRangeKernel(queue, mul_kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify
|
||||
const cl_int ref = pattern + 1;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
struct EventPipelined : public CommandBufferPipelined
|
||||
{
|
||||
EventPipelined(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: CommandBufferPipelined(device, context, queue),
|
||||
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
return CommandBufferPipelined::Skip() || !out_of_order_support;
|
||||
}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
cl_int error = CommandBufferPipelined::SetUp(elements);
|
||||
test_error(error, "EventPipelined::SetUp failed");
|
||||
|
||||
out_of_order_queue = clCreateCommandQueue(
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
|
||||
test_error(error, "Unable to create command queue to test with");
|
||||
|
||||
out_of_order_command_buffer =
|
||||
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 0, nullptr, &events[0]);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1,
|
||||
nullptr, &num_elements, nullptr, 1,
|
||||
&events[0], &events[1]);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 1, &events[1], &events[2]);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
|
||||
data_size(), output_data.data(), 1,
|
||||
&events[2], nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify
|
||||
const cl_int ref = pattern + 1;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
clCommandQueueWrapper out_of_order_queue;
|
||||
clCommandBufferWrapper out_of_order_command_buffer;
|
||||
clEventWrapper events[3] = { nullptr, nullptr, nullptr };
|
||||
};
|
||||
|
||||
struct BarrierPipelined : public CommandBufferPipelined
|
||||
{
|
||||
BarrierPipelined(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: CommandBufferPipelined(device, context, queue),
|
||||
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
return CommandBufferPipelined::Skip() || !out_of_order_support;
|
||||
}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
cl_int error = CommandBufferPipelined::SetUp(elements);
|
||||
test_error(error, "EventPipelined::SetUp failed");
|
||||
|
||||
out_of_order_queue = clCreateCommandQueue(
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
|
||||
test_error(error, "Unable to create command queue to test with");
|
||||
|
||||
out_of_order_command_buffer =
|
||||
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueBarrier(out_of_order_queue);
|
||||
test_error(error, "clEnqueueBarrier failed");
|
||||
|
||||
error =
|
||||
clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1, nullptr,
|
||||
&num_elements, nullptr, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueBarrier(out_of_order_queue);
|
||||
test_error(error, "clEnqueueBarrier failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(
|
||||
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueBarrier(out_of_order_queue);
|
||||
test_error(error, "clEnqueueBarrier failed");
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
|
||||
data_size(), output_data.data(), 0, nullptr,
|
||||
nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify
|
||||
const cl_int ref = pattern + 1;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
clCommandQueueWrapper out_of_order_queue;
|
||||
clCommandBufferWrapper out_of_order_command_buffer;
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_TEST(pipeline_in_order_deps)
|
||||
{
|
||||
return MakeAndRunTest<InOrderPipelined>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(pipeline_event_deps)
|
||||
{
|
||||
return MakeAndRunTest<EventPipelined>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(pipeline_barrier_deps)
|
||||
{
|
||||
return MakeAndRunTest<BarrierPipelined>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
@@ -44,27 +44,18 @@
|
||||
namespace {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// printf tests for cl_khr_command_buffer which handles below cases:
|
||||
// -test cases for device side printf
|
||||
// -test cases for device side printf with a simultaneous use command-buffer
|
||||
// Test for cl_khr_command_buffer which handles a command-buffer containing a
|
||||
// printf kernel being repeatedly enqueued.
|
||||
|
||||
template <bool simul_use>
|
||||
struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
{
|
||||
CommandBufferPrintfTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue),
|
||||
trigger_event(nullptr), wait_event(nullptr), file_descriptor(0),
|
||||
printf_use_support(false)
|
||||
: BasicCommandBufferTest(device, context, queue), file_descriptor(0)
|
||||
{
|
||||
simultaneous_use_requested = simul_use;
|
||||
if (simul_use)
|
||||
{
|
||||
buffer_size_multiplier = num_test_iters;
|
||||
}
|
||||
buffer_size_multiplier = num_test_iters;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
void ReleaseOutputStream(int fd)
|
||||
{
|
||||
fflush(stdout);
|
||||
@@ -72,7 +63,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
close(fd);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
int AcquireOutputStream(int* error)
|
||||
{
|
||||
int fd = streamDup(fileno(stdout));
|
||||
@@ -85,7 +75,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return fd;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
void GetAnalysisBuffer(std::stringstream& buffer)
|
||||
{
|
||||
std::ifstream fp(temp_filename, std::ios::in);
|
||||
@@ -95,7 +84,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
void PurgeTempFile()
|
||||
{
|
||||
std::ofstream ofs(temp_filename,
|
||||
@@ -103,9 +91,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
bool Skip() override
|
||||
{
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
// Query if device supports kernel printf use
|
||||
cl_device_command_buffer_capabilities_khr capabilities;
|
||||
cl_int error =
|
||||
@@ -114,16 +103,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
test_error(error,
|
||||
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
|
||||
|
||||
printf_use_support =
|
||||
const bool printf_use_support =
|
||||
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
|
||||
!= 0;
|
||||
|
||||
if (!printf_use_support) return true;
|
||||
return BasicCommandBufferTest::Skip()
|
||||
|| (simultaneous_use_requested && !simultaneous_use_support);
|
||||
return !printf_use_support;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernel() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
@@ -153,14 +139,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
size_t data_size() const override
|
||||
{
|
||||
return sizeof(cl_char) * num_elements * buffer_size_multiplier
|
||||
* max_pattern_length;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernelArgs() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
@@ -192,7 +176,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
auto pcFname = get_temp_filename();
|
||||
@@ -209,39 +192,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return BasicCommandBufferTest::SetUp(elements);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
// record command buffer with primary queue
|
||||
error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
// enqueue simultaneous command-buffers with printf calls
|
||||
error = RunSimultaneous();
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
// enqueue single command-buffer with printf calls
|
||||
error = RunSingle();
|
||||
test_error(error, "RunSingle failed");
|
||||
}
|
||||
|
||||
std::remove(temp_filename.c_str());
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
@@ -251,7 +205,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
#define test_error_release_stdout(errCode, msg) \
|
||||
{ \
|
||||
auto errCodeResult = errCode; \
|
||||
@@ -263,96 +216,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
} \
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSinglePass(const std::vector<cl_char>& pattern,
|
||||
std::vector<cl_char>& output_data)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
auto in_mem_size = sizeof(cl_char) * pattern.size();
|
||||
error = clEnqueueWriteBuffer(queue, in_mem, CL_TRUE, 0, in_mem_size,
|
||||
&pattern[0], 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
test_assert_error(pattern.size() - 1 <= CL_UINT_MAX,
|
||||
"pattern.size() - 1 does not fit in a cl_uint");
|
||||
cl_uint offset[] = { 0, static_cast<cl_uint>(pattern.size() - 1) };
|
||||
error = clEnqueueWriteBuffer(queue, off_mem, CL_TRUE, 0, sizeof(offset),
|
||||
offset, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// redirect output stream to temporary file
|
||||
file_descriptor = AcquireOutputStream(&error);
|
||||
if (error != 0)
|
||||
{
|
||||
log_error("Error while redirection stdout to file");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// enqueue command buffer with kernel containing printf command
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, &wait_event);
|
||||
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
fflush(stdout);
|
||||
|
||||
// Wait until kernel finishes its execution and (thus) the output
|
||||
// printed from the kernel is immediately printed
|
||||
error = clWaitForEvents(1, &wait_event);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
// output buffer contains pattern to be compared with printout
|
||||
error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
test_error_release_stdout(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error_release_stdout(error, "clFinish failed");
|
||||
|
||||
ReleaseOutputStream(file_descriptor);
|
||||
|
||||
// copy content of temporary file into string stream
|
||||
std::stringstream sstr;
|
||||
GetAnalysisBuffer(sstr);
|
||||
if (sstr.str().size() != num_elements * offset[1])
|
||||
{
|
||||
log_error("GetAnalysisBuffer failed\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// verify the result - compare printout and output buffer
|
||||
for (size_t i = 0; i < num_elements * offset[1]; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(sstr.str().at(i), output_data[i], i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSingle()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
std::vector<cl_char> output_data(num_elements * max_pattern_length);
|
||||
|
||||
for (unsigned i = 0; i < num_test_iters; i++)
|
||||
{
|
||||
unsigned pattern_length =
|
||||
std::max(min_pattern_length, rand() % max_pattern_length);
|
||||
char pattern_character = 'a' + rand() % 26;
|
||||
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
|
||||
pattern[pattern_length] = '\0';
|
||||
error = EnqueueSinglePass(pattern, output_data);
|
||||
test_error(error, "EnqueueSinglePass failed");
|
||||
|
||||
output_data.assign(output_data.size(), 0);
|
||||
PurgeTempFile();
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct SimulPassData
|
||||
struct EnqueuePassData
|
||||
{
|
||||
// null terminated character buffer
|
||||
std::vector<cl_char> pattern;
|
||||
@@ -361,8 +225,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
std::vector<cl_char> output_buffer;
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
cl_int EnqueuePass(EnqueuePassData& pd)
|
||||
{
|
||||
// write current pattern to device memory
|
||||
auto in_mem_size = sizeof(cl_char) * pd.pattern.size();
|
||||
@@ -377,15 +240,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
pd.offset, 0, nullptr, nullptr);
|
||||
test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// create user event to block simultaneous command buffers
|
||||
if (!trigger_event)
|
||||
{
|
||||
trigger_event = clCreateUserEvent(context, &error);
|
||||
test_error_release_stdout(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&trigger_event, nullptr);
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// output buffer contains pattern to be compared with printout
|
||||
@@ -398,14 +254,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSimultaneous()
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length);
|
||||
|
||||
std::vector<SimulPassData> simul_passes(num_test_iters);
|
||||
std::vector<EnqueuePassData> enqueue_passes(num_test_iters);
|
||||
|
||||
const int pattern_chars_range = 26;
|
||||
std::list<cl_char> pattern_chars;
|
||||
@@ -413,7 +269,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
pattern_chars.push_back(cl_char('a' + i));
|
||||
|
||||
test_assert_error(pattern_chars.size() >= num_test_iters,
|
||||
"Number of simultaneous launches must be lower than "
|
||||
"Number of launches must be lower than "
|
||||
"size of characters container");
|
||||
|
||||
cl_int total_pattern_coverage = 0;
|
||||
@@ -428,11 +284,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
|
||||
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
|
||||
pattern.back() = '\0';
|
||||
simul_passes[i] = { pattern,
|
||||
{ cl_int(i * offset), cl_int(pattern_length) },
|
||||
std::vector<cl_char>(num_elements
|
||||
* pattern_length) };
|
||||
total_pattern_coverage += simul_passes[i].output_buffer.size();
|
||||
enqueue_passes[i] = {
|
||||
pattern,
|
||||
{ cl_int(i * offset), cl_int(pattern_length) },
|
||||
std::vector<cl_char>(num_elements * pattern_length)
|
||||
};
|
||||
total_pattern_coverage += enqueue_passes[i].output_buffer.size();
|
||||
pattern_chars.erase(it);
|
||||
};
|
||||
|
||||
@@ -444,17 +301,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// enqueue read/write and command buffer operations
|
||||
for (auto&& pass : simul_passes)
|
||||
// enqueue read/write and command buffer operations, serialized
|
||||
// by in-order queue
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
error = EnqueueSimultaneousPass(pass);
|
||||
test_error_release_stdout(error, "EnqueueSimultaneousPass failed");
|
||||
error = EnqueuePass(pass);
|
||||
test_error_release_stdout(error, "EnqueuePass failed");
|
||||
}
|
||||
|
||||
// execute command buffers
|
||||
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
||||
test_error_release_stdout(error, "clSetUserEventStatus failed");
|
||||
|
||||
// flush streams
|
||||
fflush(stdout);
|
||||
|
||||
@@ -477,13 +331,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
for (int i = 0; i < total_pattern_coverage; i++)
|
||||
counters_map[sstr.str().at(i)]++;
|
||||
|
||||
if (counters_map.size() != simul_passes.size())
|
||||
if (counters_map.size() != enqueue_passes.size())
|
||||
{
|
||||
log_error("printout inconsistent with input data\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
auto& res_data = pass.output_buffer;
|
||||
|
||||
@@ -501,18 +355,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
}
|
||||
}
|
||||
|
||||
std::remove(temp_filename.c_str());
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
clEventWrapper trigger_event = nullptr;
|
||||
clEventWrapper wait_event = nullptr;
|
||||
|
||||
std::string temp_filename;
|
||||
int file_descriptor;
|
||||
|
||||
bool printf_use_support;
|
||||
|
||||
// specifies max test length for printf pattern
|
||||
const unsigned max_pattern_length = 6;
|
||||
// specifies min test length for printf pattern
|
||||
@@ -523,14 +372,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_TEST(basic_printf)
|
||||
REGISTER_TEST(printf)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferPrintfTest<false>>(device, context,
|
||||
queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(simultaneous_printf)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferPrintfTest<true>>(device, context, queue,
|
||||
num_elements);
|
||||
return MakeAndRunTest<CommandBufferPrintfTest>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
@@ -86,21 +86,17 @@ cl_int VerifyResult(const clEventWrapper& event)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Command-buffer profiling test cases:
|
||||
// -all commands are recorded to a single command-queue
|
||||
// -profiling a command-buffer with simultaneous use
|
||||
template <bool simultaneous_request>
|
||||
// Command-buffer profiling test for enqueuing command-buffer twice and checking
|
||||
// the profiling counters on the events returned.
|
||||
struct CommandBufferProfiling : public BasicCommandBufferTest
|
||||
{
|
||||
CommandBufferProfiling(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
|
||||
: BasicCommandBufferTest(device, context, queue)
|
||||
{
|
||||
simultaneous_use_requested = simultaneous_request;
|
||||
if (simultaneous_request) buffer_size_multiplier = 2;
|
||||
buffer_size_multiplier = 2; // Do two enqueues of command-buffer
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
bool Skip() override
|
||||
{
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
@@ -127,10 +123,9 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
|
||||
"Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
|
||||
return true;
|
||||
}
|
||||
return (simultaneous_use_requested && !simultaneous_use_support);
|
||||
return false;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
|
||||
@@ -156,37 +151,45 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
|
||||
return BasicCommandBufferTest::SetUp(elements);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct EnqueuePassData
|
||||
{
|
||||
cl_int offset;
|
||||
clEventWrapper query_event;
|
||||
};
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
// record command buffer
|
||||
error = RecordCommandBuffer();
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
if (simultaneous_use_requested)
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<EnqueuePassData> enqueue_passes = {
|
||||
{ 0, clEventWrapper() }, { offset, clEventWrapper() }
|
||||
};
|
||||
|
||||
// In-order queue serialized the command-buffer submissions
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
// enqueue simultaneous command-buffers with profiling command queue
|
||||
error = RunSimultaneous();
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
error = EnqueuePass(pass);
|
||||
test_error(error, "EnqueueSerializedPass failed");
|
||||
}
|
||||
else
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
// enqueue single command-buffer with profiling command queue
|
||||
error = RunSingle();
|
||||
test_error(error, "RunSingle failed");
|
||||
error = VerifyResult(pass.query_event);
|
||||
test_error(error, "VerifyResult failed");
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
@@ -196,41 +199,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSingle()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
|
||||
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
clEventWrapper query_event;
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, &query_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = VerifyResult(query_event);
|
||||
test_error(error, "VerifyResult failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct SimulPassData
|
||||
{
|
||||
cl_int offset;
|
||||
std::vector<cl_int> output_buffer;
|
||||
clEventWrapper query_event;
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
cl_int EnqueuePass(EnqueuePassData& pd)
|
||||
{
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
queue, out_mem, &pattern, sizeof(cl_int),
|
||||
@@ -241,59 +210,13 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
|
||||
0, sizeof(cl_int), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
if (!wait_event)
|
||||
{
|
||||
wait_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&wait_event, &pd.query_event);
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, &pd.query_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
|
||||
pd.output_buffer.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSimultaneous()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
{ 0, std::vector<cl_int>(num_elements) },
|
||||
{ offset, std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
{
|
||||
error = EnqueueSimultaneousPass(pass);
|
||||
test_error(error, "EnqueueSimultaneousPass failed");
|
||||
}
|
||||
|
||||
error = clSetUserEventStatus(wait_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
{
|
||||
error = VerifyResult(pass.query_event);
|
||||
test_error(error, "VerifyResult failed");
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
clEventWrapper wait_event;
|
||||
|
||||
const cl_int pattern = 0xA;
|
||||
};
|
||||
|
||||
@@ -356,19 +279,13 @@ struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_TEST(basic_profiling)
|
||||
REGISTER_TEST(profiling)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
|
||||
num_elements);
|
||||
return MakeAndRunTest<CommandBufferProfiling>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(simultaneous_profiling)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(substitute_queue_profiling)
|
||||
REGISTER_TEST(profiling_substitute_queue)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
|
||||
device, context, queue, num_elements);
|
||||
|
||||
@@ -23,21 +23,16 @@ namespace {
|
||||
// Command-queue substitution tests which handles below cases:
|
||||
// -substitution on queue without properties
|
||||
// -substitution on queue with properties
|
||||
// -simultaneous use queue substitution
|
||||
|
||||
template <bool prop_use, bool simul_use>
|
||||
template <bool prop_use>
|
||||
struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
{
|
||||
SubstituteQueueTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue),
|
||||
properties_use_requested(prop_use), user_event(nullptr)
|
||||
{
|
||||
simultaneous_use_requested = simul_use;
|
||||
if (simul_use) buffer_size_multiplier = 2;
|
||||
}
|
||||
properties_use_requested(prop_use)
|
||||
{}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
bool Skip() override
|
||||
{
|
||||
if (properties_use_requested)
|
||||
@@ -57,11 +52,9 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
return true;
|
||||
}
|
||||
|
||||
return BasicCommandBufferTest::Skip()
|
||||
|| (simultaneous_use_requested && !simultaneous_use_support);
|
||||
return BasicCommandBufferTest::Skip();
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
// By default command queue is created without properties,
|
||||
@@ -81,7 +74,6 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
return BasicCommandBufferTest::SetUp(elements);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int Run() override
|
||||
{
|
||||
// record command buffer with primary queue
|
||||
@@ -106,23 +98,14 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
}
|
||||
|
||||
if (simultaneous_use_support)
|
||||
{
|
||||
// enque simultaneous command-buffers with substitute queue
|
||||
error = RunSimultaneous(new_queue);
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
// enque single command-buffer with substitute queue
|
||||
error = RunSingle(new_queue);
|
||||
test_error(error, "RunSingle failed");
|
||||
}
|
||||
|
||||
// enqueue single command-buffer with substitute queue
|
||||
error = RunSingle(new_queue);
|
||||
test_error(error, "RunSingle failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
@@ -135,14 +118,13 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSingle(const cl_command_queue& q)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
|
||||
error = clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
cl_int error =
|
||||
clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
cl_command_queue queues[] = { q };
|
||||
@@ -165,90 +147,8 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct SimulPassData
|
||||
{
|
||||
cl_int pattern;
|
||||
cl_int offset;
|
||||
cl_command_queue queue;
|
||||
std::vector<cl_int> output_buffer;
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
{
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
pd.queue, in_mem, &pd.pattern, sizeof(cl_int),
|
||||
pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error =
|
||||
clEnqueueFillBuffer(pd.queue, off_mem, &pd.offset, sizeof(cl_int),
|
||||
0, sizeof(cl_int), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
if (!user_event)
|
||||
{
|
||||
user_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
cl_command_queue queues[] = { pd.queue };
|
||||
error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 1,
|
||||
&user_event, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
pd.queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int),
|
||||
data_size(), pd.output_buffer.data(), 0, nullptr, nullptr);
|
||||
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSimultaneous(const cl_command_queue& q)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
{ pattern_pri, 0, q, std::vector<cl_int>(num_elements) },
|
||||
{ pattern_sec, offset, q, std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
{
|
||||
error = EnqueueSimultaneousPass(pass);
|
||||
test_error(error, "EnqueuePass failed");
|
||||
}
|
||||
|
||||
error = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
{
|
||||
error = clFinish(pass.queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
auto& res_data = pass.output_buffer;
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pass.pattern, res_data[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
const cl_int pattern_pri = 0xB;
|
||||
const cl_int pattern_sec = 0xC;
|
||||
|
||||
bool properties_use_requested;
|
||||
clEventWrapper user_event;
|
||||
};
|
||||
|
||||
// Command-queue substitution tests which handles below cases:
|
||||
@@ -397,20 +297,14 @@ struct QueueOrderTest : public BasicCommandBufferTest
|
||||
|
||||
REGISTER_TEST(queue_substitution)
|
||||
{
|
||||
return MakeAndRunTest<SubstituteQueueTest<false, false>>(
|
||||
device, context, queue, num_elements);
|
||||
return MakeAndRunTest<SubstituteQueueTest<false>>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(properties_queue_substitution)
|
||||
REGISTER_TEST(queue_substitution_properties)
|
||||
{
|
||||
return MakeAndRunTest<SubstituteQueueTest<true, false>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(simultaneous_queue_substitution)
|
||||
{
|
||||
return MakeAndRunTest<SubstituteQueueTest<false, true>>(
|
||||
device, context, queue, num_elements);
|
||||
return MakeAndRunTest<SubstituteQueueTest<true>>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(queue_substitute_in_order)
|
||||
|
||||
@@ -22,25 +22,22 @@ namespace {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// clSetKernelArg tests for cl_khr_command_buffer which handles below cases:
|
||||
// -test interactions of clSetKernelArg with command-buffers
|
||||
// -test interactions of clSetKernelArg on a command-buffer pending execution
|
||||
// -test interactions of clSetKernelArg after command-buffer finalize but
|
||||
// before enqueue
|
||||
// -test interactions of clSetKernelArg between command-buffer enqueue
|
||||
|
||||
template <bool simul_use>
|
||||
template <bool enqueue_test>
|
||||
struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
{
|
||||
CommandBufferSetKernelArg(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue), trigger_event(nullptr)
|
||||
: BasicCommandBufferTest(device, context, queue)
|
||||
{
|
||||
simultaneous_use_requested = simul_use;
|
||||
if (simul_use) buffer_size_multiplier = 2;
|
||||
if (enqueue_test) buffer_size_multiplier = 2;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernel() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
const char* kernel_str =
|
||||
R"(
|
||||
__kernel void copy(int in, __global int* out, __global int* offset)
|
||||
@@ -50,8 +47,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
out[ind] = in;
|
||||
})";
|
||||
|
||||
error = create_single_kernel_helper_create_program(context, &program, 1,
|
||||
&kernel_str);
|
||||
cl_int error = create_single_kernel_helper_create_program(
|
||||
context, &program, 1, &kernel_str);
|
||||
test_error(error, "Failed to create program with source");
|
||||
|
||||
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
|
||||
@@ -63,7 +60,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int SetUpKernelArgs() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
@@ -99,15 +95,14 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if (simultaneous_use_requested)
|
||||
if (enqueue_test)
|
||||
{
|
||||
// enqueue simultaneous command-buffers with clSetKernelArg calls
|
||||
error = RunSimultaneous();
|
||||
test_error(error, "RunSimultaneous failed");
|
||||
// enqueue command-buffers with clSetKernelArg calls in between
|
||||
error = RunMultipleEnqueue();
|
||||
test_error(error, "RunMultipleEnqueue failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -119,12 +114,9 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
@@ -148,14 +140,12 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSingle()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
|
||||
// record command buffer
|
||||
error = RecordCommandBuffer();
|
||||
cl_int error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
|
||||
const cl_int pattern_base = 0;
|
||||
@@ -187,20 +177,16 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
struct SimulPassData
|
||||
struct EnqueuePassData
|
||||
{
|
||||
cl_int pattern;
|
||||
cl_int offset;
|
||||
std::vector<cl_int> output_buffer;
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RecordSimultaneousCommandBuffer() const
|
||||
cl_int RecordEnqueueCommandBuffer() const
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
@@ -210,8 +196,7 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
|
||||
cl_int EnqueuePass(EnqueuePassData& pd)
|
||||
{
|
||||
cl_int error = clEnqueueFillBuffer(
|
||||
queue, out_mem, &pd.pattern, sizeof(cl_int),
|
||||
@@ -222,14 +207,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
0, sizeof(cl_int), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
if (!trigger_event)
|
||||
{
|
||||
trigger_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
}
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&trigger_event, nullptr);
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
@@ -240,49 +219,39 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
cl_int RunSimultaneous()
|
||||
cl_int RunMultipleEnqueue()
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
// record command buffer with primary queue
|
||||
error = RecordSimultaneousCommandBuffer();
|
||||
test_error(error, "RecordSimultaneousCommandBuffer failed");
|
||||
cl_int error = RecordEnqueueCommandBuffer();
|
||||
test_error(error, "RecordEnqueeuCommandBuffer failed");
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
{ 0, 0, std::vector<cl_int>(num_elements) }
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
std::vector<EnqueuePassData> enqueue_passes = {
|
||||
{ 0, 0, std::vector<cl_int>(num_elements) },
|
||||
{ 1, offset, std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
error = EnqueueSimultaneousPass(simul_passes.front());
|
||||
test_error(error, "EnqueueSimultaneousPass 1 failed");
|
||||
|
||||
// changing kernel args at this point should have no effect,
|
||||
// test will verify if clSetKernelArg didn't affect command-buffer
|
||||
cl_int in_arg = pattern_sec;
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
if (simultaneous_use_support)
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
simul_passes.push_back(
|
||||
{ 1, offset, std::vector<cl_int>(num_elements) });
|
||||
// changing kernel args at this point should have no effect,
|
||||
// test will verify if clSetKernelArg didn't affect command-buffer
|
||||
cl_int in_arg = pattern_sec;
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = EnqueueSimultaneousPass(simul_passes.back());
|
||||
test_error(error, "EnqueueSimultaneousPass 2 failed");
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
|
||||
error = EnqueuePass(pass);
|
||||
test_error(error, "EnqueuePass failed");
|
||||
}
|
||||
|
||||
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// verify the result buffer
|
||||
for (auto&& pass : simul_passes)
|
||||
for (auto&& pass : enqueue_passes)
|
||||
{
|
||||
auto& res_data = pass.output_buffer;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
@@ -294,9 +263,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
clEventWrapper trigger_event = nullptr;
|
||||
|
||||
const cl_int pattern_pri = 2;
|
||||
const cl_int pattern_sec = 3;
|
||||
|
||||
@@ -305,13 +271,13 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_TEST(basic_set_kernel_arg)
|
||||
REGISTER_TEST(set_kernel_arg_after_finalize)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferSetKernelArg<false>>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(pending_set_kernel_arg)
|
||||
REGISTER_TEST(set_kernel_arg_after_enqueue)
|
||||
{
|
||||
return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context,
|
||||
queue, num_elements);
|
||||
|
||||
@@ -131,9 +131,10 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
bool skip = true;
|
||||
if (simultaneous_use_support)
|
||||
if (is_extension_available(
|
||||
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
|
||||
{
|
||||
rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
skip = false;
|
||||
}
|
||||
else if (is_extension_available(
|
||||
@@ -142,13 +143,6 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
|
||||
rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
|
||||
skip = false;
|
||||
}
|
||||
else if (is_extension_available(
|
||||
device,
|
||||
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
|
||||
{
|
||||
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
|
||||
skip = false;
|
||||
}
|
||||
|
||||
return skip;
|
||||
}
|
||||
@@ -185,7 +179,9 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
|
||||
if (BasicCommandBufferTest::Skip()) return true;
|
||||
|
||||
bool skip = true;
|
||||
if (!simultaneous_use_support)
|
||||
if (is_extension_available(
|
||||
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME)
|
||||
&& !simultaneous_use_support)
|
||||
{
|
||||
unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
|
||||
skip = false;
|
||||
|
||||
@@ -66,102 +66,6 @@ struct EnqueueCommandBufferNotFinalized : public BasicCommandBufferTest
|
||||
}
|
||||
};
|
||||
|
||||
// CL_INVALID_OPERATION if command_buffer was not created with the
|
||||
// CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR flag and is in the Pending state.
|
||||
struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
|
||||
: public BasicCommandBufferTest
|
||||
{
|
||||
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState(
|
||||
cl_device_id device, cl_context context, cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
|
||||
{}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
|
||||
test_failure_error_ret(error, CL_INVALID_OPERATION,
|
||||
"clEnqueueCommandBufferKHR should return "
|
||||
"CL_INVALID_OPERATION",
|
||||
TEST_FAIL);
|
||||
|
||||
error = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
clFinish(queue);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
|
||||
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
|
||||
|
||||
cl_int error = clGetCommandBufferInfoKHR(
|
||||
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
|
||||
&state, nullptr);
|
||||
test_error_ret(error, "clGetCommandBufferInfoKHR failed",
|
||||
TEST_FAIL);
|
||||
|
||||
test_assert_error(
|
||||
state == expected,
|
||||
"Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query!");
|
||||
|
||||
return TEST_PASS;
|
||||
};
|
||||
|
||||
cl_int error = BasicCommandBufferTest::SetUp(elements);
|
||||
test_error(error, "BasicCommandBufferTest::SetUp failed");
|
||||
|
||||
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
|
||||
test_error(error, "clCreateCommandBufferKHR failed");
|
||||
|
||||
error = RecordCommandBuffer();
|
||||
test_error(error, "RecordCommandBuffer failed");
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
|
||||
test_error(error, "State is not Executable");
|
||||
|
||||
error = EnqueueCommandBuffer();
|
||||
test_error(error, "EnqueueCommandBuffer failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int RecordCommandBuffer()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int EnqueueCommandBuffer()
|
||||
{
|
||||
cl_int pattern = 0xE;
|
||||
|
||||
cl_int error =
|
||||
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
user_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&user_event, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
clEventWrapper user_event;
|
||||
};
|
||||
|
||||
// CL_INVALID_VALUE if queues is NULL and num_queues is > 0, or queues is not
|
||||
// NULL and num_queues is 0.
|
||||
struct EnqueueCommandBufferNullQueuesNumQueues : public BasicCommandBufferTest
|
||||
@@ -623,14 +527,6 @@ REGISTER_TEST(negative_enqueue_command_buffer_not_finalized)
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(
|
||||
negative_enqueue_command_buffer_without_simultaneous_no_pending_state)
|
||||
{
|
||||
return MakeAndRunTest<
|
||||
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_enqueue_command_buffer_null_queues_num_queues)
|
||||
{
|
||||
return MakeAndRunTest<EnqueueCommandBufferNullQueuesNumQueues>(
|
||||
|
||||
@@ -44,20 +44,9 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
|
||||
FinalizeCommandBufferNotRecordingState(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
|
||||
: BasicCommandBufferTest(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
cl_int error = BasicCommandBufferTest::SetUp(elements);
|
||||
test_error(error, "BasicCommandBufferTest::SetUp failed");
|
||||
|
||||
user_event = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
|
||||
@@ -87,18 +76,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
|
||||
"CL_INVALID_OPERATION",
|
||||
TEST_FAIL);
|
||||
|
||||
error = EnqueueCommandBuffer();
|
||||
test_error(error, "EnqueueCommandBuffer failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_failure_error_ret(error, CL_INVALID_OPERATION,
|
||||
"clFinalizeCommandBufferKHR should return "
|
||||
"CL_INVALID_OPERATION",
|
||||
TEST_FAIL);
|
||||
|
||||
clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
clFinish(queue);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -114,22 +91,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int EnqueueCommandBuffer()
|
||||
{
|
||||
cl_int pattern = 0xE;
|
||||
cl_int error =
|
||||
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
|
||||
&user_event, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
clEventWrapper user_event;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -398,16 +398,17 @@ struct CommandNDRangeKernelWithKernelEnqueueCall : public BasicCommandBufferTest
|
||||
|
||||
const char* kernel_str =
|
||||
R"(
|
||||
__kernel void enqueue_call_func() {
|
||||
}
|
||||
__kernel void enqueue_call_func(__global int* out_mem) {
|
||||
out_mem[get_global_id(0)] = 0x1234;
|
||||
}
|
||||
|
||||
__kernel void enqueue_call_kernel() {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(1);
|
||||
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
|
||||
^{enqueue_call_func();});
|
||||
}
|
||||
)";
|
||||
__kernel void enqueue_call_kernel(__global int* out_mem) {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(1);
|
||||
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
|
||||
^{enqueue_call_func(out_mem);});
|
||||
}
|
||||
)";
|
||||
std::string build_options = std::string(" ") + cl_std;
|
||||
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
@@ -443,7 +444,17 @@ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
clMemWrapper out_mem =
|
||||
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
num_elements * sizeof(cl_int), nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//
|
||||
|
||||
#include "debug_ahb.h"
|
||||
#include "harness/errorHelpers.h"
|
||||
|
||||
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
|
||||
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
|
||||
@@ -191,3 +192,27 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
|
||||
}
|
||||
return result;
|
||||
}
|
||||
AHardwareBuffer *create_AHB(AHardwareBuffer_Desc *desc)
|
||||
{
|
||||
AHardwareBuffer *buffer_ptr = nullptr;
|
||||
int err = AHardwareBuffer_allocate(desc, &buffer_ptr);
|
||||
if (err != 0)
|
||||
{
|
||||
throw std::runtime_error("AHardwareBuffer_allocate failed with code: "
|
||||
+ std::to_string(err) + "\n");
|
||||
}
|
||||
return buffer_ptr;
|
||||
}
|
||||
|
||||
void log_unsupported_ahb_format(AHardwareBuffer_Desc aHardwareBufferDesc)
|
||||
{
|
||||
std::string usage_string = ahardwareBufferDecodeUsageFlagsToString(
|
||||
static_cast<AHardwareBuffer_UsageFlags>(aHardwareBufferDesc.usage));
|
||||
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, %u, "
|
||||
"layers = %u)\n",
|
||||
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
|
||||
aHardwareBufferDesc.format))
|
||||
.c_str(),
|
||||
usage_string.c_str(), aHardwareBufferDesc.width,
|
||||
aHardwareBufferDesc.height, aHardwareBufferDesc.layers);
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <CL/cl.h>
|
||||
|
||||
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
|
||||
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
|
||||
@@ -40,3 +41,90 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
|
||||
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
|
||||
std::string
|
||||
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);
|
||||
|
||||
AHardwareBuffer* create_AHB(AHardwareBuffer_Desc* desc);
|
||||
void log_unsupported_ahb_format(AHardwareBuffer_Desc desc);
|
||||
|
||||
struct AHardwareBufferWrapper
|
||||
{
|
||||
AHardwareBuffer* m_ahb;
|
||||
|
||||
AHardwareBufferWrapper(): m_ahb(nullptr) {}
|
||||
|
||||
AHardwareBufferWrapper(AHardwareBuffer* ahb) { m_ahb = ahb; }
|
||||
|
||||
AHardwareBufferWrapper(AHardwareBuffer_Desc* desc)
|
||||
{
|
||||
m_ahb = create_AHB(desc);
|
||||
}
|
||||
|
||||
AHardwareBufferWrapper& operator=(AHardwareBuffer* rhs)
|
||||
{
|
||||
release();
|
||||
m_ahb = rhs;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~AHardwareBufferWrapper() { release(); }
|
||||
|
||||
// Copy constructor
|
||||
AHardwareBufferWrapper(AHardwareBufferWrapper const& ahbw)
|
||||
: m_ahb(ahbw.m_ahb)
|
||||
{
|
||||
retain();
|
||||
}
|
||||
|
||||
// Copy assignment operator
|
||||
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper const& rhs)
|
||||
{
|
||||
release();
|
||||
|
||||
m_ahb = rhs.m_ahb;
|
||||
retain();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Move constructor
|
||||
AHardwareBufferWrapper(AHardwareBufferWrapper&& ahbw)
|
||||
{
|
||||
m_ahb = ahbw.m_ahb;
|
||||
ahbw.m_ahb = nullptr;
|
||||
}
|
||||
|
||||
// Move assignment operator
|
||||
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper&& rhs)
|
||||
{
|
||||
if (this != &rhs)
|
||||
{
|
||||
release(); // Giving up current reference
|
||||
m_ahb = rhs.m_ahb;
|
||||
rhs.m_ahb = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void retain()
|
||||
{
|
||||
if (nullptr != m_ahb)
|
||||
{
|
||||
AHardwareBuffer_acquire(m_ahb);
|
||||
}
|
||||
}
|
||||
|
||||
void release()
|
||||
{
|
||||
if (nullptr != m_ahb)
|
||||
{
|
||||
AHardwareBuffer_release(m_ahb);
|
||||
}
|
||||
}
|
||||
|
||||
// Usage operators
|
||||
operator AHardwareBuffer*() { return m_ahb; }
|
||||
cl_mem_properties get_props()
|
||||
{
|
||||
return reinterpret_cast<cl_mem_properties>(m_ahb);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -97,7 +97,7 @@ static const char *diff_images_kernel_source = {
|
||||
};
|
||||
|
||||
// Checks that the inferred image format is correct
|
||||
REGISTER_TEST(test_images)
|
||||
REGISTER_TEST(images)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
@@ -134,19 +134,15 @@ REGISTER_TEST(test_images)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
const cl_mem_properties props[] = {
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
cl_mem image = clCreateImageWithProperties(
|
||||
@@ -181,8 +177,6 @@ REGISTER_TEST(test_images)
|
||||
|
||||
test_error(clReleaseMemObject(image),
|
||||
"Failed to release image");
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -190,7 +184,7 @@ REGISTER_TEST(test_images)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_images_read)
|
||||
REGISTER_TEST(images_read)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -238,15 +232,11 @@ REGISTER_TEST(test_images_read)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -279,7 +269,7 @@ REGISTER_TEST(test_images_read)
|
||||
generate_random_image_data(&imageInfo, srcData, seed);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -301,7 +291,7 @@ REGISTER_TEST(test_images_read)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -394,7 +384,7 @@ REGISTER_TEST(test_images_read)
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReleaseExternalMemObjectsKHR(
|
||||
queue, 1, &opencl_image, 0, nullptr, nullptr);
|
||||
queue, 1, &imported_image, 0, nullptr, nullptr);
|
||||
test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed");
|
||||
|
||||
// Read buffer and verify
|
||||
@@ -482,9 +472,6 @@ REGISTER_TEST(test_images_read)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -492,7 +479,7 @@ REGISTER_TEST(test_images_read)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_read_image)
|
||||
REGISTER_TEST(enqueue_read_image)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -540,15 +527,12 @@ REGISTER_TEST(test_enqueue_read_image)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -581,7 +565,7 @@ REGISTER_TEST(test_enqueue_read_image)
|
||||
generate_random_image_data(&imageInfo, srcData, seed);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -601,9 +585,9 @@ REGISTER_TEST(test_enqueue_read_image)
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
const cl_mem_properties props[] = {
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -662,9 +646,6 @@ REGISTER_TEST(test_enqueue_read_image)
|
||||
out_image_ptr += imageInfo.rowPitch;
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
if (total_matched == 0)
|
||||
{
|
||||
test_fail("Zero bytes matched");
|
||||
@@ -676,7 +657,7 @@ REGISTER_TEST(test_enqueue_read_image)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_copy_image)
|
||||
REGISTER_TEST(enqueue_copy_image)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -724,15 +705,12 @@ REGISTER_TEST(test_enqueue_copy_image)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -765,7 +743,7 @@ REGISTER_TEST(test_enqueue_copy_image)
|
||||
generate_random_image_data(&imageInfo, srcData, seed);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -787,7 +765,7 @@ REGISTER_TEST(test_enqueue_copy_image)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -975,9 +953,6 @@ REGISTER_TEST(test_enqueue_copy_image)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -985,7 +960,7 @@ REGISTER_TEST(test_enqueue_copy_image)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
REGISTER_TEST(enqueue_copy_image_to_buffer)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -1033,15 +1008,12 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -1074,7 +1046,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
generate_random_image_data(&imageInfo, srcData, seed);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -1096,7 +1068,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -1165,9 +1137,6 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
out_buffer_ptr += scanlineSize;
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
if (total_matched == 0)
|
||||
{
|
||||
test_fail("Zero bytes matched");
|
||||
@@ -1179,7 +1148,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
REGISTER_TEST(enqueue_copy_buffer_to_image)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -1227,15 +1196,12 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -1275,7 +1241,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -1307,7 +1273,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
&hardware_buffer_desc);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -1366,9 +1332,6 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
if (total_matched == 0)
|
||||
{
|
||||
test_fail("Zero bytes matched");
|
||||
@@ -1380,7 +1343,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_write_image)
|
||||
REGISTER_TEST(enqueue_write_image)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -1428,15 +1391,12 @@ REGISTER_TEST(test_enqueue_write_image)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -1453,7 +1413,7 @@ REGISTER_TEST(test_enqueue_write_image)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -1503,7 +1463,7 @@ REGISTER_TEST(test_enqueue_write_image)
|
||||
&hardware_buffer_desc);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -1564,9 +1524,6 @@ REGISTER_TEST(test_enqueue_write_image)
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
if (total_matched == 0)
|
||||
{
|
||||
test_fail("Zero bytes matched");
|
||||
@@ -1578,7 +1535,7 @@ REGISTER_TEST(test_enqueue_write_image)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_enqueue_fill_image)
|
||||
REGISTER_TEST(enqueue_fill_image)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
@@ -1626,15 +1583,12 @@ REGISTER_TEST(test_enqueue_fill_image)
|
||||
|
||||
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
|
||||
&aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat)
|
||||
.c_str());
|
||||
|
||||
// Determine AHB memory layout
|
||||
AHardwareBuffer_Desc hardware_buffer_desc = {};
|
||||
@@ -1650,7 +1604,7 @@ REGISTER_TEST(test_enqueue_fill_image)
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
clMemWrapper imported_image = clCreateImageWithProperties(
|
||||
@@ -1739,7 +1693,7 @@ REGISTER_TEST(test_enqueue_fill_image)
|
||||
&hardware_buffer_desc);
|
||||
|
||||
void *hardware_buffer_data = nullptr;
|
||||
ahb_result = AHardwareBuffer_lock(
|
||||
int ahb_result = AHardwareBuffer_lock(
|
||||
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
|
||||
nullptr, &hardware_buffer_data);
|
||||
if (ahb_result != 0)
|
||||
@@ -1819,8 +1773,6 @@ REGISTER_TEST(test_enqueue_fill_image)
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
free(verificationLine);
|
||||
|
||||
if (total_matched == 0)
|
||||
@@ -1834,7 +1786,7 @@ REGISTER_TEST(test_enqueue_fill_image)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_blob)
|
||||
REGISTER_TEST(blob)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
@@ -1883,19 +1835,17 @@ REGISTER_TEST(test_blob)
|
||||
continue;
|
||||
}
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result =
|
||||
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n",
|
||||
ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
|
||||
|
||||
log_info(
|
||||
"Testing %s\n",
|
||||
ahardwareBufferFormatToString(
|
||||
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
|
||||
.c_str());
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
aHardwareBuffer.get_props(), 0
|
||||
};
|
||||
|
||||
cl_mem buffer = clCreateBufferWithProperties(
|
||||
@@ -1903,8 +1853,6 @@ REGISTER_TEST(test_blob)
|
||||
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
|
||||
|
||||
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <android/hardware_buffer.h>
|
||||
#include "debug_ahb.h"
|
||||
|
||||
REGISTER_TEST(test_buffer_format_negative)
|
||||
REGISTER_TEST(buffer_format_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
@@ -98,7 +98,7 @@ REGISTER_TEST(test_buffer_format_negative)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_buffer_size_negative)
|
||||
REGISTER_TEST(buffer_size_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
constexpr size_t buffer_size = 64;
|
||||
@@ -175,7 +175,7 @@ REGISTER_TEST(test_buffer_size_negative)
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_images_negative)
|
||||
REGISTER_TEST(images_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
@@ -244,3 +244,155 @@ REGISTER_TEST(test_images_negative)
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(invalid_arguments)
|
||||
{
|
||||
cl_int err;
|
||||
constexpr cl_uint buffer_size = 4096;
|
||||
|
||||
if (!is_extension_available(
|
||||
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||
{
|
||||
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||
"supported on this platform. Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||
aHardwareBufferDesc.width = buffer_size;
|
||||
aHardwareBufferDesc.height = 1;
|
||||
aHardwareBufferDesc.layers = 1;
|
||||
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
|
||||
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
|
||||
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
|
||||
|
||||
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||
{
|
||||
log_unsupported_ahb_format(aHardwareBufferDesc);
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBufferWrapper ahb_buffer(&aHardwareBufferDesc);
|
||||
|
||||
aHardwareBufferDesc.width = 64;
|
||||
aHardwareBufferDesc.height = 64;
|
||||
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||
|
||||
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||
{
|
||||
log_unsupported_ahb_format(aHardwareBufferDesc);
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBufferWrapper ahb_image(&aHardwareBufferDesc);
|
||||
|
||||
const cl_mem_properties props_buffer[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
ahb_buffer.get_props(),
|
||||
0,
|
||||
};
|
||||
const cl_mem_properties props_image[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
ahb_image.get_props(),
|
||||
0,
|
||||
};
|
||||
|
||||
// stub values
|
||||
cl_image_format image_format = { 0 };
|
||||
cl_image_desc image_desc = { 0 };
|
||||
int host_data = 0;
|
||||
void *host_ptr = reinterpret_cast<void *>(&host_data);
|
||||
|
||||
log_info("Testing buffer error conditions\n");
|
||||
|
||||
// Buffer error conditions
|
||||
clMemWrapper mem =
|
||||
clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
|
||||
buffer_size + 1, nullptr, &err);
|
||||
if (CL_INVALID_BUFFER_SIZE != err)
|
||||
{
|
||||
log_error(
|
||||
"clCreateBufferWithProperties should return CL_INVALID_BUFFER_SIZE "
|
||||
"but returned %s: CL_INVALID_BUFFER_SIZE if size is non-zero and "
|
||||
"greater than the AHardwareBuffer when importing external memory "
|
||||
"using cl_khr_external_memory_android_hardware_buffer\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
mem = clCreateBufferWithProperties(context, props_image, CL_MEM_READ_WRITE,
|
||||
0, nullptr, &err);
|
||||
if (CL_INVALID_OPERATION != err)
|
||||
{
|
||||
log_error(
|
||||
"clCreateBufferWithProperties should return CL_INVALID_OPERATION "
|
||||
"but returned %s: CL_INVALID_OPERATION if the AHardwareBuffer "
|
||||
"format is not AHARDWAREBUFFER_FORMAT_BLOB\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
mem = clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
|
||||
0, host_ptr, &err);
|
||||
if (CL_INVALID_HOST_PTR != err)
|
||||
{
|
||||
log_error(
|
||||
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
|
||||
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
log_info("Testing image error conditions\n");
|
||||
|
||||
// Image error conditions
|
||||
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
|
||||
&image_format, nullptr, nullptr, &err);
|
||||
if (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR != err)
|
||||
{
|
||||
log_error(
|
||||
"clCreateBufferWithProperties should return "
|
||||
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR but returned %s: "
|
||||
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if image_format is not NULL "
|
||||
"when using cl_khr_external_memory_android_hardware_buffer.\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
|
||||
nullptr, &image_desc, nullptr, &err);
|
||||
if (CL_INVALID_IMAGE_DESCRIPTOR != err)
|
||||
{
|
||||
log_error("clCreateBufferWithProperties should return "
|
||||
"CL_INVALID_IMAGE_DESCRIPTOR but returned %s: "
|
||||
"CL_INVALID_IMAGE_DESCRIPTOR if image_desc is not NULL when "
|
||||
"using cl_khr_external_memory_android_hardware_buffer.\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
mem = clCreateImageWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
|
||||
nullptr, nullptr, nullptr, &err);
|
||||
if (CL_IMAGE_FORMAT_NOT_SUPPORTED != err)
|
||||
{
|
||||
log_error("clCreateBufferWithProperties should return "
|
||||
"CL_IMAGE_FORMAT_NOT_SUPPORTED but returned %s: "
|
||||
"CL_IMAGE_FORMAT_NOT_SUPPORTED if AHardwareBuffer's format "
|
||||
"is not supported\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
|
||||
nullptr, nullptr, host_ptr, &err);
|
||||
if (CL_INVALID_HOST_PTR != err)
|
||||
{
|
||||
log_error(
|
||||
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
|
||||
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
|
||||
IGetErrorString(err));
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
if (WIN32)
|
||||
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
|
||||
${CLConform_INCLUDE_DIR})
|
||||
link_directories(${CL_LIB_DIR})
|
||||
|
||||
list(APPEND CLConform_LIBRARIES directx_wrapper)
|
||||
|
||||
set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_external_semaphore_dx_fence.cpp
|
||||
test_external_semaphore_dx_fence_negative_wait_signal.cpp
|
||||
test_external_semaphore_dx_fence_queries.cpp
|
||||
test_external_semaphore_dx_fence_export.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
${MODULE_NAME}_SOURCES
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories("../../common/directx_wrapper")
|
||||
include(../../CMakeCommon.txt)
|
||||
endif (WIN32)
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "harness/testHarness.h"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
|
||||
test_registry::getInstance().definitions(), false, 0);
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/extensionHelpers.h"
|
||||
#include "harness/errorHelpers.h"
|
||||
#include "directx_wrapper.hpp"
|
||||
|
||||
class CLDXSemaphoreWrapper {
|
||||
public:
|
||||
CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
|
||||
ID3D12Device* dx_device)
|
||||
: device(device), context(context), dx_device(dx_device){};
|
||||
|
||||
int createSemaphoreFromFence(ID3D12Fence* fence)
|
||||
{
|
||||
cl_int errcode = CL_SUCCESS;
|
||||
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
|
||||
const HRESULT hr = dx_device->CreateSharedHandle(
|
||||
fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
|
||||
test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
|
||||
|
||||
cl_semaphore_properties_khr sem_props[] = {
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_TYPE_BINARY_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
|
||||
};
|
||||
semaphore =
|
||||
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
|
||||
test_error(errcode, "Could not create semaphore");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
~CLDXSemaphoreWrapper()
|
||||
{
|
||||
releaseSemaphore();
|
||||
if (fence_handle)
|
||||
{
|
||||
CloseHandle(fence_handle);
|
||||
}
|
||||
};
|
||||
|
||||
const cl_semaphore_khr* operator&() const { return &semaphore; };
|
||||
cl_semaphore_khr operator*() const { return semaphore; };
|
||||
|
||||
HANDLE getHandle() const { return fence_handle; };
|
||||
|
||||
private:
|
||||
cl_semaphore_khr semaphore;
|
||||
ComPtr<ID3D12Fence> fence;
|
||||
HANDLE fence_handle;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
ComPtr<ID3D12Device> dx_device;
|
||||
|
||||
int releaseSemaphore() const
|
||||
{
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
|
||||
if (semaphore)
|
||||
{
|
||||
clReleaseSemaphoreKHR(semaphore);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
static bool
|
||||
is_import_handle_available(cl_device_id device,
|
||||
const cl_external_memory_handle_type_khr handle_type)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
size_t import_types_size = 0;
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
|
||||
nullptr, &import_types_size);
|
||||
if (errcode != CL_SUCCESS)
|
||||
{
|
||||
log_error("Could not query import semaphore handle types");
|
||||
return false;
|
||||
}
|
||||
std::vector<cl_external_semaphore_handle_type_khr> import_types(
|
||||
import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
|
||||
import_types_size, import_types.data(), nullptr);
|
||||
if (errcode != CL_SUCCESS)
|
||||
{
|
||||
log_error("Could not query import semaphore handle types");
|
||||
return false;
|
||||
}
|
||||
|
||||
return std::find(import_types.begin(), import_types.end(), handle_type)
|
||||
!= import_types.end();
|
||||
}
|
||||
@@ -0,0 +1,324 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "semaphore_dx_fence_base.h"
|
||||
|
||||
// Confirm that a signal followed by a wait in OpenCL will complete successfully
|
||||
REGISTER_TEST(test_external_semaphores_signal_wait)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
clEventWrapper signal_event;
|
||||
errcode = clEnqueueSignalSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
|
||||
test_error(errcode, "Failed to signal semaphore");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
|
||||
test_error(errcode, "Failed to wait semaphore");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
// Verify that the events completed.
|
||||
test_assert_event_complete(signal_event);
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
|
||||
// successfully
|
||||
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
|
||||
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
|
||||
|
||||
log_info("Calling d3d12_fence->Signal()\n");
|
||||
const HRESULT hr = (*fence)->Signal(semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
|
||||
// successfully
|
||||
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
|
||||
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
|
||||
|
||||
log_info("Calling d3d12_command_queue->Signal()\n");
|
||||
const HRESULT hr =
|
||||
dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that interlocking waits between OpenCL and DX12 will complete
|
||||
// successfully
|
||||
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling d3d12_command_queue->Wait(1)\n");
|
||||
cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
HRESULT hr =
|
||||
dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
|
||||
|
||||
log_info("Calling d3d12_command_queue->Signal(2)\n");
|
||||
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence");
|
||||
|
||||
log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
|
||||
clEventWrapper signal_event;
|
||||
errcode = clEnqueueSignalSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
|
||||
test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
|
||||
semaphore_payload += 1;
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
|
||||
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(wait_event);
|
||||
test_assert_event_complete(signal_event);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
|
||||
// in DX12 followed by signals in OpenCL complete successfully
|
||||
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
|
||||
"Could not create semaphore");
|
||||
|
||||
const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
|
||||
"Could not create semaphore");
|
||||
|
||||
const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
cl_semaphore_payload_khr semaphore_payload_list[] = {
|
||||
semaphore_payload, semaphore_payload + 1
|
||||
};
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
|
||||
semaphore_payload_list, 0, nullptr,
|
||||
&wait_event);
|
||||
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
|
||||
|
||||
log_info("Calling d3d12_command_queue->Signal()\n");
|
||||
HRESULT hr =
|
||||
dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
|
||||
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
|
||||
|
||||
log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
|
||||
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
|
||||
test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
|
||||
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
|
||||
test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
semaphore_payload_list[0] = semaphore_payload + 3;
|
||||
semaphore_payload_list[1] = semaphore_payload + 2;
|
||||
|
||||
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
|
||||
clEventWrapper signal_event;
|
||||
errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
|
||||
semaphore_payload_list, 0, nullptr,
|
||||
&signal_event);
|
||||
test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
|
||||
|
||||
// Wait until the GPU has completed commands up to this fence point.
|
||||
log_info("Waiting for D3D12 command queue completion\n");
|
||||
if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
|
||||
{
|
||||
const HANDLE event_handle =
|
||||
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
|
||||
hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
|
||||
event_handle);
|
||||
test_error(FAILED(hr),
|
||||
"Failed to set D3D12 fence 1 event on completion");
|
||||
WaitForSingleObject(event_handle, INFINITE);
|
||||
CloseHandle(event_handle);
|
||||
}
|
||||
if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
|
||||
{
|
||||
const HANDLE event_handle =
|
||||
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
|
||||
hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
|
||||
event_handle);
|
||||
test_error(FAILED(hr),
|
||||
"Failed to set D3D12 fence 2 event on completion");
|
||||
WaitForSingleObject(event_handle, INFINITE);
|
||||
CloseHandle(event_handle);
|
||||
}
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(signal_event);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "semaphore_dx_fence_base.h"
|
||||
|
||||
// Confirm that a wait followed by a signal in DirectX 12 using an exported
|
||||
// semaphore will complete successfully
|
||||
REGISTER_TEST(test_external_semaphores_export_dx_signal)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
GET_PFN(device, clGetSemaphoreInfoKHR);
|
||||
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
|
||||
|
||||
size_t export_types_size = 0;
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
|
||||
nullptr, &export_types_size);
|
||||
test_error(errcode, "Could not query export semaphore handle types");
|
||||
std::vector<cl_external_semaphore_handle_type_khr> export_types(
|
||||
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
|
||||
export_types_size, export_types.data(), nullptr);
|
||||
test_error(errcode, "Could not query export semaphore handle types");
|
||||
|
||||
if (std::find(export_types.begin(), export_types.end(),
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
|
||||
== export_types.end())
|
||||
{
|
||||
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
|
||||
"the supported export types\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
constexpr cl_semaphore_properties_khr sem_props[] = {
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
|
||||
0
|
||||
};
|
||||
cl_semaphore_khr semaphore =
|
||||
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
|
||||
test_error(errcode, "Could not create semaphore");
|
||||
|
||||
cl_bool is_exportable = CL_FALSE;
|
||||
errcode =
|
||||
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
|
||||
sizeof(is_exportable), &is_exportable, nullptr);
|
||||
test_error(errcode, "Could not get semaphore info");
|
||||
test_error(!is_exportable, "Semaphore is not exportable");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
clEventWrapper wait_event;
|
||||
errcode = clEnqueueWaitSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
|
||||
test_error(errcode, "Failed to wait semaphore");
|
||||
|
||||
HANDLE semaphore_handle = nullptr;
|
||||
errcode = clGetSemaphoreHandleForTypeKHR(
|
||||
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
|
||||
sizeof(semaphore_handle), &semaphore_handle, nullptr);
|
||||
test_error(errcode, "Could not get semaphore handle");
|
||||
|
||||
ID3D12Fence *fence = nullptr;
|
||||
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
|
||||
IID_PPV_ARGS(&fence));
|
||||
test_error(errcode, "Could not open semaphore handle");
|
||||
|
||||
log_info("Calling fence->Signal()\n");
|
||||
const HRESULT hr = fence->Signal(semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to signal D3D12 fence");
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
// Release resources
|
||||
CloseHandle(semaphore_handle);
|
||||
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
|
||||
fence->Release();
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
|
||||
// exported semaphore will complete successfully
|
||||
REGISTER_TEST(test_external_semaphores_export_dx_wait)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
GET_PFN(device, clGetSemaphoreInfoKHR);
|
||||
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
|
||||
|
||||
size_t export_types_size = 0;
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
|
||||
nullptr, &export_types_size);
|
||||
test_error(errcode, "Could not query export semaphore handle types");
|
||||
std::vector<cl_external_semaphore_handle_type_khr> export_types(
|
||||
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
|
||||
errcode =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
|
||||
export_types_size, export_types.data(), nullptr);
|
||||
test_error(errcode, "Could not query export semaphore handle types");
|
||||
|
||||
if (std::find(export_types.begin(), export_types.end(),
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
|
||||
== export_types.end())
|
||||
{
|
||||
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
|
||||
"the supported export types\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
constexpr cl_semaphore_properties_khr sem_props[] = {
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
|
||||
0
|
||||
};
|
||||
cl_semaphore_khr semaphore =
|
||||
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
|
||||
test_error(errcode, "Could not create semaphore");
|
||||
|
||||
cl_bool is_exportable = CL_FALSE;
|
||||
errcode =
|
||||
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
|
||||
sizeof(is_exportable), &is_exportable, nullptr);
|
||||
test_error(errcode, "Could not get semaphore info");
|
||||
test_error(!is_exportable, "Semaphore is not exportable");
|
||||
|
||||
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
|
||||
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
|
||||
clEventWrapper signal_event;
|
||||
errcode = clEnqueueSignalSemaphoresKHR(
|
||||
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
|
||||
test_error(errcode, "Failed to signal semaphore");
|
||||
|
||||
HANDLE semaphore_handle = nullptr;
|
||||
errcode = clGetSemaphoreHandleForTypeKHR(
|
||||
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
|
||||
sizeof(semaphore_handle), &semaphore_handle, nullptr);
|
||||
test_error(errcode, "Could not get semaphore handle");
|
||||
|
||||
ID3D12Fence *fence = nullptr;
|
||||
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
|
||||
IID_PPV_ARGS(&fence));
|
||||
test_error(errcode, "Could not open semaphore handle");
|
||||
|
||||
log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
|
||||
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
|
||||
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
|
||||
|
||||
log_info("Calling WaitForSingleObject\n");
|
||||
if (fence->GetCompletedValue() < semaphore_payload)
|
||||
{
|
||||
const HANDLE event =
|
||||
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
|
||||
hr = fence->SetEventOnCompletion(semaphore_payload, event);
|
||||
test_error(FAILED(hr), "Failed to set event on completion");
|
||||
WaitForSingleObject(event, INFINITE);
|
||||
CloseHandle(event);
|
||||
}
|
||||
|
||||
errcode = clFinish(queue);
|
||||
test_error(errcode, "Could not finish queue");
|
||||
|
||||
test_assert_event_complete(signal_event);
|
||||
|
||||
// Release resources
|
||||
CloseHandle(semaphore_handle);
|
||||
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
|
||||
fence->Release();
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "semaphore_dx_fence_base.h"
|
||||
|
||||
// Confirm that a wait without a semaphore payload list will return
|
||||
// CL_INVALID_VALUE
|
||||
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
|
||||
nullptr, nullptr);
|
||||
test_assert_error(
|
||||
errcode == CL_INVALID_VALUE,
|
||||
"Unexpected error code returned from clEnqueueWaitSemaphores");
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that a signal without a semaphore payload list will return
|
||||
// CL_INVALID_VALUE
|
||||
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
|
||||
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
|
||||
nullptr, nullptr);
|
||||
test_assert_error(
|
||||
errcode == CL_INVALID_VALUE,
|
||||
"Unexpected error code returned from clEnqueueSignalSemaphores");
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "semaphore_dx_fence_base.h"
|
||||
|
||||
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
|
||||
// properties returned by clGetSemaphoreInfo
|
||||
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
|
||||
{
|
||||
int errcode = CL_SUCCESS;
|
||||
const DirectXWrapper dx_wrapper;
|
||||
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
|
||||
GET_PFN(device, clReleaseSemaphoreKHR);
|
||||
GET_PFN(device, clGetSemaphoreInfoKHR);
|
||||
|
||||
test_error(!is_import_handle_available(device,
|
||||
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
|
||||
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
|
||||
"supported import types");
|
||||
|
||||
// Import D3D12 fence into OpenCL
|
||||
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
|
||||
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
|
||||
test_error(semaphore.createSemaphoreFromFence(*fence),
|
||||
"Could not create semaphore");
|
||||
|
||||
size_t properties_size_bytes = 0;
|
||||
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
|
||||
nullptr, &properties_size_bytes);
|
||||
test_error(errcode, "Could not get semaphore info");
|
||||
std::vector<cl_semaphore_properties_khr> semaphore_properties(
|
||||
properties_size_bytes / sizeof(cl_semaphore_properties_khr));
|
||||
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
|
||||
properties_size_bytes,
|
||||
semaphore_properties.data(), nullptr);
|
||||
test_error(errcode, "Could not get semaphore info");
|
||||
|
||||
for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
|
||||
{
|
||||
if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
|
||||
&& semaphore_properties[i + 1]
|
||||
== reinterpret_cast<cl_semaphore_properties_khr>(
|
||||
semaphore.getHandle()))
|
||||
{
|
||||
return TEST_PASS;
|
||||
}
|
||||
}
|
||||
log_error(
|
||||
"Failed to find the dx fence handle type in the semaphore properties");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
@@ -35,7 +35,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
|
||||
int wgid = get_group_id(0);
|
||||
int wgsize = get_local_size(0);
|
||||
|
||||
if (tid == 0) atomic_store(localPtr, 0);
|
||||
if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
@@ -47,12 +47,12 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
|
||||
if ((wgid % 2) == 0)
|
||||
ptr = localPtr;
|
||||
|
||||
int inc = atomic_fetch_add(ptr, 1);
|
||||
int inc = atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
|
||||
|
||||
// In the cases where the local memory ptr was used,
|
||||
// save off the final value.
|
||||
if ((wgid % 2) == 0 && inc == (wgsize-1))
|
||||
atomic_store(&globalPtr[wgid], inc);
|
||||
atomic_store_explicit(&globalPtr[wgid], inc, memory_order_relaxed, memory_scope_work_group);
|
||||
}
|
||||
)OpenCLC";
|
||||
|
||||
@@ -67,7 +67,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
|
||||
int wgid = get_group_id(0);
|
||||
int wgsize = get_local_size(0);
|
||||
|
||||
if (tid == 0) atomic_store(localPtr, 0);
|
||||
if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
@@ -79,14 +79,17 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
|
||||
if ((tid % 2) == 0)
|
||||
ptr = localPtr;
|
||||
|
||||
atomic_fetch_add(ptr, 1);
|
||||
atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// In the cases where the local memory ptr was used,
|
||||
// save off the final value.
|
||||
if (tid == 0)
|
||||
atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr));
|
||||
atomic_store_explicit(&globalPtr[(wgid * 2) + 1],
|
||||
atomic_load_explicit(localPtr, memory_order_relaxed, memory_scope_work_group),
|
||||
memory_order_relaxed,
|
||||
memory_scope_work_group);
|
||||
}
|
||||
)OpenCLC";
|
||||
}
|
||||
|
||||
@@ -200,7 +200,7 @@ int main(int argc, const char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
if (argc > 1 && strcmp(argv[1], "-list") == 0)
|
||||
if (gListTests)
|
||||
{
|
||||
log_info("Available 2.x tests:\n");
|
||||
for (int i = 0; i < test_num; i++)
|
||||
|
||||
BIN
test_conformance/spir/half.zip
Normal file → Executable file
BIN
test_conformance/spir/half.zip
Normal file → Executable file
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -43,35 +43,37 @@
|
||||
//
|
||||
// Task
|
||||
//
|
||||
Task::Task(cl_device_id device, const char* options):
|
||||
m_devid(device) {
|
||||
if (options)
|
||||
m_options = options;
|
||||
Task::Task(cl_device_id device, const char* options): m_devid(device)
|
||||
{
|
||||
if (options) m_options = options;
|
||||
}
|
||||
|
||||
Task::~Task() {}
|
||||
|
||||
const char* Task::getErrorLog() const {
|
||||
return m_log.c_str();
|
||||
}
|
||||
const char* Task::getErrorLog() const { return m_log.c_str(); }
|
||||
|
||||
void Task::setErrorLog(cl_program prog) {
|
||||
void Task::setErrorLog(cl_program prog)
|
||||
{
|
||||
size_t len = 0;
|
||||
std::vector<char> log;
|
||||
|
||||
cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
|
||||
if(err_code != CL_SUCCESS)
|
||||
cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG,
|
||||
0, NULL, &len);
|
||||
if (err_code != CL_SUCCESS)
|
||||
{
|
||||
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) failed.\n";
|
||||
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) "
|
||||
"failed.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
log.resize(len, 0);
|
||||
|
||||
err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len, &log[0], NULL);
|
||||
if(err_code != CL_SUCCESS)
|
||||
err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len,
|
||||
&log[0], NULL);
|
||||
if (err_code != CL_SUCCESS)
|
||||
{
|
||||
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) failed.\n";
|
||||
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) "
|
||||
"failed.\n";
|
||||
return;
|
||||
}
|
||||
m_log.append(&log[0]);
|
||||
@@ -84,10 +86,11 @@ BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options)
|
||||
: Task(dev, options), m_program(prog)
|
||||
{}
|
||||
|
||||
bool BuildTask::execute() {
|
||||
cl_int err_code = clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
|
||||
if(CL_SUCCESS == err_code)
|
||||
return true;
|
||||
bool BuildTask::execute()
|
||||
{
|
||||
cl_int err_code =
|
||||
clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
|
||||
if (CL_SUCCESS == err_code) return true;
|
||||
|
||||
setErrorLog(m_program);
|
||||
return false;
|
||||
@@ -96,8 +99,10 @@ bool BuildTask::execute() {
|
||||
//
|
||||
// SpirBuildTask
|
||||
//
|
||||
SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev, const char* options) :
|
||||
BuildTask(prog, dev, options) {}
|
||||
SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev,
|
||||
const char* options)
|
||||
: BuildTask(prog, dev, options)
|
||||
{}
|
||||
|
||||
//
|
||||
// CompileTask
|
||||
@@ -107,47 +112,43 @@ CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options)
|
||||
: Task(dev, options), m_program(prog)
|
||||
{}
|
||||
|
||||
void CompileTask::addHeader(const char* hname, cl_program hprog) {
|
||||
void CompileTask::addHeader(const char* hname, cl_program hprog)
|
||||
{
|
||||
m_headers.push_back(std::make_pair(hname, hprog));
|
||||
}
|
||||
|
||||
const char* first(std::pair<const char*,cl_program>& p) {
|
||||
return p.first;
|
||||
}
|
||||
const char* first(std::pair<const char*, cl_program>& p) { return p.first; }
|
||||
|
||||
cl_program second(const std::pair<const char*, cl_program>& p) {
|
||||
cl_program second(const std::pair<const char*, cl_program>& p)
|
||||
{
|
||||
return p.second;
|
||||
}
|
||||
|
||||
bool CompileTask::execute() {
|
||||
bool CompileTask::execute()
|
||||
{
|
||||
// Generating the header names vector.
|
||||
std::vector<const char*> names;
|
||||
std::transform(m_headers.begin(), m_headers.end(), names.begin(), first);
|
||||
|
||||
// Generating the header programs vector.
|
||||
std::vector<cl_program> programs;
|
||||
std::transform(m_headers.begin(), m_headers.end(), programs.begin(), second);
|
||||
std::transform(m_headers.begin(), m_headers.end(), programs.begin(),
|
||||
second);
|
||||
|
||||
const char** h_names = NULL;
|
||||
const cl_program* h_programs = NULL;
|
||||
if (!m_headers.empty())
|
||||
{
|
||||
h_programs = &programs[0];
|
||||
h_names = &names[0];
|
||||
h_names = &names[0];
|
||||
}
|
||||
|
||||
// Compiling with the headers.
|
||||
cl_int err_code = clCompileProgram(
|
||||
m_program,
|
||||
1U,
|
||||
&m_devid,
|
||||
m_options.c_str(),
|
||||
m_headers.size(), // # of headers
|
||||
h_programs,
|
||||
h_names,
|
||||
NULL, NULL);
|
||||
if (CL_SUCCESS == err_code)
|
||||
return true;
|
||||
cl_int err_code =
|
||||
clCompileProgram(m_program, 1U, &m_devid, m_options.c_str(),
|
||||
m_headers.size(), // # of headers
|
||||
h_programs, h_names, NULL, NULL);
|
||||
if (CL_SUCCESS == err_code) return true;
|
||||
|
||||
setErrorLog(m_program);
|
||||
return false;
|
||||
@@ -156,8 +157,10 @@ bool CompileTask::execute() {
|
||||
//
|
||||
// SpirCompileTask
|
||||
//
|
||||
SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev, const char* options) :
|
||||
CompileTask(prog, dev, options) {}
|
||||
SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev,
|
||||
const char* options)
|
||||
: CompileTask(prog, dev, options)
|
||||
{}
|
||||
|
||||
|
||||
//
|
||||
@@ -169,13 +172,16 @@ LinkTask::LinkTask(cl_program* programs, int num_programs, cl_context ctxt,
|
||||
m_numPrograms(num_programs), m_context(ctxt)
|
||||
{}
|
||||
|
||||
bool LinkTask::execute() {
|
||||
bool LinkTask::execute()
|
||||
{
|
||||
cl_int err_code;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < m_numPrograms; ++i)
|
||||
for (i = 0; i < m_numPrograms; ++i)
|
||||
{
|
||||
err_code = clCompileProgram(m_programs[i], 1, &m_devid, "-x spir -spir-std=1.2 -cl-kernel-arg-info", 0, NULL, NULL, NULL, NULL);
|
||||
err_code = clCompileProgram(m_programs[i], 1, &m_devid,
|
||||
"-x spir -spir-std=1.2 -cl-kernel-arg-info",
|
||||
0, NULL, NULL, NULL, NULL);
|
||||
if (CL_SUCCESS != err_code)
|
||||
{
|
||||
setErrorLog(m_programs[i]);
|
||||
@@ -183,91 +189,78 @@ bool LinkTask::execute() {
|
||||
}
|
||||
}
|
||||
|
||||
m_executable = clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms, m_programs, NULL, NULL, &err_code);
|
||||
if (CL_SUCCESS == err_code)
|
||||
return true;
|
||||
m_executable =
|
||||
clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms,
|
||||
m_programs, NULL, NULL, &err_code);
|
||||
if (CL_SUCCESS == err_code) return true;
|
||||
|
||||
if(m_executable) setErrorLog(m_executable);
|
||||
if (m_executable) setErrorLog(m_executable);
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_program LinkTask::getExecutable() const {
|
||||
return m_executable;
|
||||
}
|
||||
cl_program LinkTask::getExecutable() const { return m_executable; }
|
||||
|
||||
LinkTask::~LinkTask() {
|
||||
if(m_executable) clReleaseProgram(m_executable);
|
||||
LinkTask::~LinkTask()
|
||||
{
|
||||
if (m_executable) clReleaseProgram(m_executable);
|
||||
}
|
||||
|
||||
//
|
||||
// KernelEnumerator
|
||||
//
|
||||
void KernelEnumerator::process(cl_program prog) {
|
||||
void KernelEnumerator::process(cl_program prog)
|
||||
{
|
||||
const size_t MAX_KERNEL_NAME = 64;
|
||||
size_t num_kernels;
|
||||
|
||||
cl_int err_code = clGetProgramInfo(
|
||||
prog,
|
||||
CL_PROGRAM_NUM_KERNELS,
|
||||
sizeof(size_t),
|
||||
&num_kernels,
|
||||
NULL
|
||||
);
|
||||
if (CL_SUCCESS != err_code)
|
||||
return;
|
||||
cl_int err_code = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS,
|
||||
sizeof(size_t), &num_kernels, NULL);
|
||||
if (CL_SUCCESS != err_code) return;
|
||||
|
||||
// Querying for the number of kernels.
|
||||
size_t buffer_len = sizeof(char)*num_kernels*MAX_KERNEL_NAME;
|
||||
size_t buffer_len = sizeof(char) * num_kernels * MAX_KERNEL_NAME;
|
||||
char* kernel_names = new char[buffer_len];
|
||||
memset(kernel_names, '\0', buffer_len);
|
||||
size_t str_len = 0;
|
||||
err_code = clGetProgramInfo(
|
||||
prog,
|
||||
CL_PROGRAM_KERNEL_NAMES,
|
||||
buffer_len,
|
||||
(void *)kernel_names,
|
||||
&str_len
|
||||
);
|
||||
if (CL_SUCCESS != err_code)
|
||||
return;
|
||||
err_code = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, buffer_len,
|
||||
(void*)kernel_names, &str_len);
|
||||
if (CL_SUCCESS != err_code) return;
|
||||
|
||||
//parsing the names and inserting them to the list
|
||||
// parsing the names and inserting them to the list
|
||||
std::string names(kernel_names);
|
||||
assert (str_len == 1+names.size() && "incompatible string lengths");
|
||||
assert(str_len == 1 + names.size() && "incompatible string lengths");
|
||||
size_t offset = 0;
|
||||
for(size_t i=0 ; i<names.size() ; ++i){
|
||||
//kernel names are separated by semi colons
|
||||
if (names[i] == ';'){
|
||||
m_kernels.push_back(names.substr(offset, i-offset));
|
||||
offset = i+1;
|
||||
for (size_t i = 0; i < names.size(); ++i)
|
||||
{
|
||||
// kernel names are separated by semi colons
|
||||
if (names[i] == ';')
|
||||
{
|
||||
m_kernels.push_back(names.substr(offset, i - offset));
|
||||
offset = i + 1;
|
||||
}
|
||||
}
|
||||
m_kernels.push_back(names.substr(offset, names.size()-offset));
|
||||
m_kernels.push_back(names.substr(offset, names.size() - offset));
|
||||
delete[] kernel_names;
|
||||
}
|
||||
|
||||
KernelEnumerator::KernelEnumerator(cl_program prog) {
|
||||
process(prog);
|
||||
}
|
||||
KernelEnumerator::KernelEnumerator(cl_program prog) { process(prog); }
|
||||
|
||||
KernelEnumerator::iterator KernelEnumerator::begin(){
|
||||
KernelEnumerator::iterator KernelEnumerator::begin()
|
||||
{
|
||||
return m_kernels.begin();
|
||||
}
|
||||
|
||||
KernelEnumerator::iterator KernelEnumerator::end(){
|
||||
return m_kernels.end();
|
||||
}
|
||||
KernelEnumerator::iterator KernelEnumerator::end() { return m_kernels.end(); }
|
||||
|
||||
size_t KernelEnumerator::size() const {
|
||||
return m_kernels.size();
|
||||
}
|
||||
size_t KernelEnumerator::size() const { return m_kernels.size(); }
|
||||
|
||||
/**
|
||||
Run the single test - run the test for both CL and SPIR versions of the kernel
|
||||
*/
|
||||
static bool run_test(cl_context context, cl_command_queue queue, cl_program clprog,
|
||||
cl_program bcprog, const std::string& kernel_name, std::string& err, const cl_device_id device,
|
||||
float ulps)
|
||||
static bool run_test(cl_context context, cl_command_queue queue,
|
||||
cl_program clprog, cl_program bcprog,
|
||||
const std::string& kernel_name, std::string& err,
|
||||
const cl_device_id device, float ulps)
|
||||
{
|
||||
WorkSizeInfo ws;
|
||||
TestResult cl_result;
|
||||
@@ -276,28 +269,29 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
|
||||
{
|
||||
// make sure that the kernel will be released before the program
|
||||
clKernelWrapper kernel = create_kernel_helper(clprog, kernel_name);
|
||||
// based on the kernel characteristics, we are generating and initializing the arguments for both phases (cl and bc executions)
|
||||
// based on the kernel characteristics, we are generating and
|
||||
// initializing the arguments for both phases (cl and bc executions)
|
||||
generate_kernel_data(context, kernel, ws, cl_result);
|
||||
bc_result.reset(cl_result.clone(context, ws, kernel, device));
|
||||
assert (compare_results(cl_result, *bc_result, ulps) && "not equal?");
|
||||
run_kernel( kernel, queue, ws, cl_result );
|
||||
assert(compare_results(cl_result, *bc_result, ulps) && "not equal?");
|
||||
run_kernel(kernel, queue, ws, cl_result);
|
||||
}
|
||||
// now, run the single BC test
|
||||
{
|
||||
// make sure that the kernel will be released before the program
|
||||
clKernelWrapper kernel = create_kernel_helper(bcprog, kernel_name);
|
||||
run_kernel( kernel, queue, ws, *bc_result );
|
||||
run_kernel(kernel, queue, ws, *bc_result);
|
||||
}
|
||||
|
||||
int error = clFinish(queue);
|
||||
if( CL_SUCCESS != error)
|
||||
if (CL_SUCCESS != error)
|
||||
{
|
||||
err = "clFinish failed\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
// compare the results
|
||||
if( !compare_results(cl_result, *bc_result, ulps) )
|
||||
if (!compare_results(cl_result, *bc_result, ulps))
|
||||
{
|
||||
err = " (result diff in kernel '" + kernel_name + "').";
|
||||
return false;
|
||||
@@ -308,16 +302,16 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
|
||||
/**
|
||||
Get the maximum relative error defined as ULP of floating-point math functions
|
||||
*/
|
||||
static float get_max_ulps(const char *test_name)
|
||||
static float get_max_ulps(const char* test_name)
|
||||
{
|
||||
float ulps = 0.f;
|
||||
// Get ULP values from math_brute_force functionList
|
||||
if (strstr(test_name, "math_kernel"))
|
||||
{
|
||||
for( size_t i = 0; i < functionListCount; i++ )
|
||||
for (size_t i = 0; i < functionListCount; i++)
|
||||
{
|
||||
char name[64];
|
||||
const Func *func = &functionList[ i ];
|
||||
const Func* func = &functionList[i];
|
||||
sprintf(name, ".%s_float", func->name);
|
||||
if (strstr(test_name, name))
|
||||
{
|
||||
@@ -336,16 +330,17 @@ static float get_max_ulps(const char *test_name)
|
||||
return ulps;
|
||||
}
|
||||
|
||||
TestRunner::TestRunner(EventHandler *success, EventHandler *failure,
|
||||
const OclExtensions& devExt):
|
||||
m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt) {}
|
||||
TestRunner::TestRunner(EventHandler* success, EventHandler* failure,
|
||||
const OclExtensions& devExt)
|
||||
: m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt)
|
||||
{}
|
||||
|
||||
/**
|
||||
Based on the test name build the cl file name, the bc file name and execute
|
||||
the kernel for both modes (cl and bc).
|
||||
*/
|
||||
bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
const char *test_name, cl_uint size_t_width)
|
||||
bool TestRunner::runBuildTest(cl_device_id device, const char* folder,
|
||||
const char* test_name, cl_uint size_t_width)
|
||||
{
|
||||
int failures = 0;
|
||||
// Composing the name of the CSV file.
|
||||
@@ -365,28 +360,35 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
cl_bool images3D = khrDb.isImages3DRequired(folder, test_name);
|
||||
|
||||
char deviceProfile[64];
|
||||
clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile), &deviceProfile, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile),
|
||||
&deviceProfile, NULL);
|
||||
std::string device_profile(deviceProfile, 64);
|
||||
|
||||
if(images == CL_TRUE && checkForImageSupport(device) != 0)
|
||||
if (images == CL_TRUE && checkForImageSupport(device) != 0)
|
||||
{
|
||||
(*m_successHandler)(test_name, "");
|
||||
std::cout << "Skipped. (Cannot run on device due to Images is not supported)." << std::endl;
|
||||
std::cout
|
||||
<< "Skipped. (Cannot run on device due to Images is not supported)."
|
||||
<< std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
if(images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
|
||||
if (images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
|
||||
{
|
||||
(*m_successHandler)(test_name, "");
|
||||
std::cout << "Skipped. (Cannot run on device as 3D images are not supported)." << std::endl;
|
||||
std::cout
|
||||
<< "Skipped. (Cannot run on device as 3D images are not supported)."
|
||||
<< std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
OclExtensions requiredExt = khrDb.getRequiredExtensions(folder, test_name);
|
||||
if(!m_devExt->supports(requiredExt))
|
||||
if (!m_devExt->supports(requiredExt))
|
||||
{
|
||||
(*m_successHandler)(test_name, "");
|
||||
std::cout << "Skipped. (Cannot run on device due to missing extensions: " << m_devExt->get_missing(requiredExt) << " )." << std::endl;
|
||||
std::cout
|
||||
<< "Skipped. (Cannot run on device due to missing extensions: "
|
||||
<< m_devExt->get_missing(requiredExt) << " )." << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -409,17 +411,26 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
|
||||
cl_device_fp_config gFloatCapabilities = 0;
|
||||
cl_int err;
|
||||
if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
|
||||
if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
|
||||
sizeof(gFloatCapabilities), &gFloatCapabilities,
|
||||
NULL)))
|
||||
{
|
||||
log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", err);
|
||||
log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
|
||||
err);
|
||||
}
|
||||
|
||||
if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr")) {
|
||||
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0) {
|
||||
if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr"))
|
||||
{
|
||||
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
|
||||
{
|
||||
(*m_successHandler)(test_name, "");
|
||||
std::cout << "Skipped. (Cannot run on device due to missing CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)" << std::endl;
|
||||
std::cout << "Skipped. (Cannot run on device due to missing "
|
||||
"CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)"
|
||||
<< std::endl;
|
||||
return true;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||
cloptions += " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||
}
|
||||
@@ -427,33 +438,39 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
|
||||
// Building the programs.
|
||||
BuildTask clBuild(clprog, device, cloptions.c_str());
|
||||
if (!clBuild.execute()) {
|
||||
if (!clBuild.execute())
|
||||
{
|
||||
std::cerr << clBuild.getErrorLog() << std::endl;
|
||||
(*m_failureHandler)(test_name, "");
|
||||
return false;
|
||||
}
|
||||
|
||||
SpirBuildTask bcBuild(bcprog, device, bcoptions.c_str());
|
||||
if (!bcBuild.execute()) {
|
||||
if (!bcBuild.execute())
|
||||
{
|
||||
std::cerr << bcBuild.getErrorLog() << std::endl;
|
||||
(*m_failureHandler)(test_name, "");
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelEnumerator clkernel_enumerator(clprog),
|
||||
bckernel_enumerator(bcprog);
|
||||
if (clkernel_enumerator.size() != bckernel_enumerator.size()) {
|
||||
KernelEnumerator clkernel_enumerator(clprog), bckernel_enumerator(bcprog);
|
||||
if (clkernel_enumerator.size() != bckernel_enumerator.size())
|
||||
{
|
||||
std::cerr << "number of kernels in test" << test_name
|
||||
<< " doesn't match in bc and cl files" << std::endl;
|
||||
(*m_failureHandler)(test_name, "");
|
||||
return false;
|
||||
}
|
||||
KernelEnumerator::iterator it = clkernel_enumerator.begin(),
|
||||
e = clkernel_enumerator.end();
|
||||
e = clkernel_enumerator.end();
|
||||
while (it != e)
|
||||
{
|
||||
std::string kernel_name = *it++;
|
||||
std::string err;
|
||||
try
|
||||
{
|
||||
bool success = run_test(context, queue, clprog, bcprog, kernel_name, err, device, ulps);
|
||||
bool success = run_test(context, queue, clprog, bcprog, kernel_name,
|
||||
err, device, ulps);
|
||||
if (success)
|
||||
{
|
||||
log_info("kernel '%s' passed.\n", kernel_name.c_str());
|
||||
@@ -468,7 +485,8 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
} catch (const std::runtime_error& err)
|
||||
{
|
||||
++failures;
|
||||
log_info("kernel '%s' failed: %s\n", kernel_name.c_str(), err.what());
|
||||
log_info("kernel '%s' failed: %s\n", kernel_name.c_str(),
|
||||
err.what());
|
||||
(*m_failureHandler)(test_name, kernel_name);
|
||||
}
|
||||
}
|
||||
@@ -476,4 +494,3 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
|
||||
log_info("%s %s\n", test_name, failures ? "FAILED" : "passed.");
|
||||
return failures == 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ else()
|
||||
add_custom_command(
|
||||
OUTPUT ${VULKAN_TEST_RESOURCES}/buffer.spv
|
||||
COMMAND ${Vulkan_glslang_binary}
|
||||
--target-env vulkan1.0
|
||||
--target-env vulkan1.1
|
||||
-o ${VULKAN_TEST_RESOURCES}/buffer.spv
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer.comp
|
||||
DEPENDS buffer.comp
|
||||
@@ -35,7 +35,7 @@ else()
|
||||
add_custom_command(
|
||||
OUTPUT ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
|
||||
COMMAND ${Vulkan_glslang_binary}
|
||||
--target-env vulkan1.0
|
||||
--target-env vulkan1.1
|
||||
-o ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/image2D_${GLSL_FORMAT}.comp
|
||||
DEPENDS image2D_${GLSL_FORMAT}.comp
|
||||
|
||||
Reference in New Issue
Block a user