Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2025-11-06 14:34:30 -08:00
63 changed files with 4934 additions and 2745 deletions

View File

@@ -25,11 +25,18 @@ if(USE_CL_EXPERIMENTAL)
add_definitions(-DCL_EXPERIMENTAL) add_definitions(-DCL_EXPERIMENTAL)
endif(USE_CL_EXPERIMENTAL) endif(USE_CL_EXPERIMENTAL)
option(SANITIZER_THREAD "Build with the thread sanitiser" OFF)
if (SANITIZER_THREAD)
add_compile_options(-fsanitize=thread)
add_link_options(-fsanitize=thread)
endif (SANITIZER_THREAD)
#----------------------------------------------------------- #-----------------------------------------------------------
# Default Configurable Test Set # Default Configurable Test Set
#----------------------------------------------------------- #-----------------------------------------------------------
option(D3D10_IS_SUPPORTED "Run DirectX 10 interop tests" OFF) option(D3D10_IS_SUPPORTED "Run DirectX 10 interop tests" OFF)
option(D3D11_IS_SUPPORTED "Run DirectX 11 interop tests" OFF) option(D3D11_IS_SUPPORTED "Run DirectX 11 interop tests" OFF)
option(D3D12_IS_SUPPORTED "Run DirectX 12 interop tests" OFF)
option(GL_IS_SUPPORTED "Run OpenGL interop tests" OFF) option(GL_IS_SUPPORTED "Run OpenGL interop tests" OFF)
option(GLES_IS_SUPPORTED "Run OpenGL ES interop tests" OFF) option(GLES_IS_SUPPORTED "Run OpenGL ES interop tests" OFF)
option(VULKAN_IS_SUPPORTED "Run Vulkan interop tests" OFF) option(VULKAN_IS_SUPPORTED "Run Vulkan interop tests" OFF)

View File

@@ -137,10 +137,10 @@ uint32_t get_channel_order_channel_count(cl_channel_order order)
case CL_RGx: return 2; case CL_RGx: return 2;
case CL_RGB: case CL_RGB:
case CL_RGBx: case CL_sRGB: return 3;
case CL_sRGB:
case CL_sRGBx: return 3;
case CL_RGBx:
case CL_sRGBx:
case CL_RGBA: case CL_RGBA:
case CL_ARGB: case CL_ARGB:
case CL_BGRA: case CL_BGRA:

View File

@@ -39,4 +39,20 @@ inline std::string str_sprintf(const std::string &str, Args... args)
return std::string(buffer.get(), buffer.get() + s - 1); return std::string(buffer.get(), buffer.get() + s - 1);
} }
// Returns the argument, converted to std::string.
// The return type of std::filesystem::path::u8string() was
// std::string in C++17, but became std::u8string in C++20.
// Use this method to wrap the result when a std::string
// is desired.
//
// Use a template with a specialization for std::string,
// so the generic template applies when std::u8string exists
// and is used.
template <typename STRING_TYPE>
inline std::string to_string(const STRING_TYPE &str)
{
return std::string(str.begin(), str.end());
}
inline std::string to_string(const std::string &str) { return str; }
#endif // STRING_HELPERS_H #endif // STRING_HELPERS_H

View File

@@ -56,6 +56,9 @@ if(VULKAN_IS_SUPPORTED)
add_subdirectory( common/vulkan_wrapper ) add_subdirectory( common/vulkan_wrapper )
add_subdirectory( vulkan ) add_subdirectory( vulkan )
endif() endif()
if(D3D12_IS_SUPPORTED)
add_subdirectory(common/directx_wrapper)
endif ()
file(GLOB CSV_FILES "opencl_conformance_tests_*.csv") file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")

View File

@@ -1,6 +1,6 @@
// //
// Copyright (c) 2017 The Khronos Group Inc. // Copyright (c) 2017 The Khronos Group Inc.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
@@ -15,142 +15,167 @@
// //
#include "common.h" #include "common.h"
typedef struct { typedef struct
cl_int *pA; {
cl_int *pB; cl_int* pA;
cl_int *pC; cl_int* pB;
cl_int* pC;
} BufPtrs; } BufPtrs;
const char *set_kernel_exec_info_svm_ptrs_kernel[] = { const char* set_kernel_exec_info_svm_ptrs_kernel[] = {
"struct BufPtrs;\n" "struct BufPtrs;\n"
"\n" "\n"
"typedef struct {\n" "typedef struct {\n"
" __global int *pA;\n" " __global int *pA;\n"
" __global int *pB;\n" " __global int *pB;\n"
" __global int *pC;\n" " __global int *pC;\n"
"} BufPtrs;\n" "} BufPtrs;\n"
"\n" "\n"
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n" "__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
"{\n" "{\n"
" size_t i;\n" " size_t i;\n"
" i = get_global_id(0);\n" " i = get_global_id(0);\n"
" pBufs->pA[i]++;\n" " pBufs->pA[i]++;\n"
" pBufs->pB[i]++;\n" " pBufs->pB[i]++;\n"
" pBufs->pC[i]++;\n" " pBufs->pC[i]++;\n"
"}\n" "}\n"
}; };
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag. // Test that clSetKernelExecInfo works correctly with
// CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
// //
REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs) REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
{ {
clContextWrapper c = NULL; clContextWrapper c = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
// error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0], // error = create_cl_objects(device,
// &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN); // &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q,
error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0], // &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
&c, &program, &queues[0], &num_devices, error = create_cl_objects(device, &set_kernel_exec_info_svm_ptrs_kernel[0],
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); &c, &program, &queues[0], &num_devices,
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error < 0) return -1; // fail test. if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test.
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error); clKernelWrapper k =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "set_kernel_exec_info_test", &error);
test_error(error, "clCreateKernel failed");
size_t size = num_elements*sizeof(int); size_t size = num_elements * sizeof(int);
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); // int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE |
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); // CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); int* pB =
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0); // (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0); // sizeof(int)*num_elements, 0); int* pC = (int*) clSVMalloc(c,
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0); // CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM,
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0); // sizeof(int)*num_elements, 0);
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0); int* pA = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pB = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pC = (int*)clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
BufPtrs* pBuf =
(BufPtrs*)clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
bool failed = false; bool failed = false;
{
clMemWrapper ba,bb,bc,bBuf;
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
error = clFinish(queues[0]);
test_error(error, "clFinish failed.");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++)
{ {
if(pA[i] + pB[i] + pC[i] != 3) clMemWrapper ba, bb, bc, bBuf;
failed = true; ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf,
&error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for (int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS,
sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0,
NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queues[0]);
test_error(error, "clFinish failed.");
// Special case testing of unsetting previously set SVM pointers
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, 0, NULL);
test_error(error,
"Unsetting previously set SVM pointers using "
"clSetKernelExecInfo failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for (int i = 0; i < num_elements; i++)
{
if (pA[i] + pB[i] + pC[i] != 3) failed = true;
}
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
} }
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL); error = clFinish(queues[0]);
test_error(error, " clEnqueueUnmapMemObject failed."); test_error(error, " clFinish failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
}
error = clFinish(queues[0]); clSVMFree(c, pA);
test_error(error, " clFinish failed."); clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
clSVMFree(c, pA); if (failed) return -1;
clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
if(failed) return -1; return 0;
return 0;
} }

View File

@@ -16,92 +16,184 @@
#include "testBase.h" #include "testBase.h"
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
#include <vector>
REGISTER_TEST(negative_create_command_queue) REGISTER_TEST(negative_create_command_queue)
{ {
cl_command_queue_properties device_props = 0; cl_int err = 0;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, clCreateCommandQueue(nullptr, device, 0, &err);
sizeof(device_props), &device_props, NULL); test_failure_error_ret(
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); err, CL_INVALID_CONTEXT,
"clCreateCommandQueue should return CL_INVALID_CONTEXT when: \"context "
"is not a valid context\" using a nullptr",
TEST_FAIL);
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional property to clCreateCommandQueue(context, nullptr, 0, &err);
// clCreateCommandQueue, CL_QUEUE_PROFILING_ENABLE is mandatory. test_failure_error_ret(
const bool out_of_order_device_support = err, CL_INVALID_DEVICE,
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; "clCreateCommandQueue should return CL_INVALID_DEVICE when: \"device "
if (out_of_order_device_support) "is not a valid device\" using a nullptr",
TEST_FAIL);
cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{ {
// Early return as we can't check correct error is returned for clCreateCommandQueue(context, different_device, 0, &err);
// unsupported property. test_failure_error_ret(
return TEST_PASS; err, CL_INVALID_DEVICE,
"clCreateCommandQueue should return CL_INVALID_DEVICE when: "
"\"device is not associated with context\"",
TEST_FAIL);
} }
// Try create a command queue with out-of-order property and check return cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
// code -1) };
cl_int test_error = CL_SUCCESS; clCreateCommandQueue(context, device, invalid_property, &err);
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &test_error);
test_failure_error_ret( test_failure_error_ret(
test_error, CL_INVALID_QUEUE_PROPERTIES, err, CL_INVALID_VALUE,
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES if " "clCreateCommandQueue should return CL_INVALID_VALUE when: \"values "
"values specified in properties are valid but are not supported by " "specified in properties are not valid\"",
"the "
"device.",
TEST_FAIL); TEST_FAIL);
cl_command_queue_properties device_queue_properties = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_queue_properties),
&device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
cl_command_queue_properties valid_properties[] = {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
};
cl_queue_properties property{ 0 };
bool missing_property = false;
// Iterate through all possible properties to find one which isn't supported
for (auto prop : valid_properties)
{
if ((device_queue_properties & prop) == 0)
{
missing_property = true;
property = prop;
break;
}
}
// This test can only run when a device does not support a property
if (missing_property)
{
clCreateCommandQueue(context, device, property, &err);
test_failure_error_ret(
err, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES "
"when: \"values specified in properties are valid but are not "
"supported by the device\"",
TEST_FAIL);
}
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST_VERSION(negative_create_command_queue_with_properties, REGISTER_TEST_VERSION(negative_create_command_queue_with_properties,
Version(2, 0)) Version(2, 0))
{ {
cl_command_queue_properties device_props = 0; cl_int err = 0;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, clCreateCommandQueueWithProperties(nullptr, device, nullptr, &err);
sizeof(device_props), &device_props, NULL); test_failure_error_ret(
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); err, CL_INVALID_CONTEXT,
"clCreateCommandQueueWithProperties should return CL_INVALID_CONTEXT "
"when: \"context is not a valid context\" using a nullptr",
TEST_FAIL);
cl_command_queue_properties device_on_host_props = 0; clCreateCommandQueueWithProperties(context, nullptr, nullptr, &err);
error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, test_failure_error_ret(
sizeof(device_on_host_props), &device_on_host_props, err, CL_INVALID_DEVICE,
NULL); "clCreateCommandQueueWithProperties should return CL_INVALID_DEVICE "
test_error(error, "when: \"device is not a valid device\" using a nullptr",
"clGetDeviceInfo for CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed"); TEST_FAIL);
if (device_on_host_props != device_props) cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{ {
log_error( clCreateCommandQueueWithProperties(context, different_device, nullptr,
"ERROR: CL_DEVICE_QUEUE_PROPERTIES and " &err);
"CL_DEVICE_QUEUE_ON_HOST_PROPERTIES properties should match\n"); test_failure_error_ret(
return TEST_FAIL; err, CL_INVALID_DEVICE,
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_DEVICE when: \"device is not associated with context\"",
TEST_FAIL);
} }
// CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue cl_queue_properties invalid_property{ static_cast<cl_queue_properties>(
// property to clCreateCommandQueueWithProperties, -1) };
// CL_QUEUE_PROFILING_ENABLE is mandatory.
const bool out_of_order_device_support =
device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
if (out_of_order_device_support)
{
// Early return as we can't check correct error is returned for
// unsupported property.
return TEST_PASS;
}
// Try create a command queue with out-of-order property and check return // Depending on the OpenCL Version, there can be up to 2 properties which
// code // each take values, and the list should be terminated with a 0
cl_command_queue_properties queue_prop_def[] = { cl_queue_properties properties[] = { invalid_property, invalid_property, 0,
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 0, 0 };
clCreateCommandQueueWithProperties(context, device, properties, &err);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateCommandQueueWithProperties should return CL_INVALID_VALUE "
"when: \"values specified in properties are not valid\"",
TEST_FAIL);
cl_command_queue_properties device_queue_properties = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_queue_properties),
&device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
cl_command_queue_properties valid_properties[] = {
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE
}; };
properties[0] = CL_QUEUE_PROPERTIES;
cl_int test_error = CL_SUCCESS; bool missing_property = false;
clCommandQueueWrapper test_queue = clCreateCommandQueueWithProperties( // Iterate through all possible properties to find one which isn't supported
context, device, queue_prop_def, &test_error); for (auto property : valid_properties)
{
test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES, if ((device_queue_properties & property) == 0)
"clCreateCommandQueueWithProperties should " {
"return CL_INVALID_QUEUE_PROPERTIES if " missing_property = true;
"values specified in properties are valid but " properties[1] = property;
"are not supported by the " break;
"device.", }
TEST_FAIL); }
if (missing_property)
{
clCreateCommandQueueWithProperties(context, device, properties, &err);
test_failure_error_ret(
err, CL_INVALID_QUEUE_PROPERTIES,
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_QUEUE_PROPERTIES when: \"values specified in "
"properties are valid but are not supported by the device\"",
TEST_FAIL);
}
else if (get_device_cl_version(device) >= Version(2, 0))
{
cl_uint max_size = -1;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
sizeof(max_size), &max_size, nullptr);
test_error(err, "clGetDeviceInfo");
if (max_size > 0 && max_size < CL_UINT_MAX)
{
properties[0] = CL_QUEUE_PROPERTIES;
properties[1] =
CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties[2] = CL_QUEUE_SIZE;
properties[3] = max_size + 1;
clCreateCommandQueueWithProperties(context, device, properties,
&err);
if (err != CL_INVALID_VALUE && err != CL_INVALID_QUEUE_PROPERTIES)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clCreateCommandQueueWithProperties should return "
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES "
"when: \"values specified in properties are not "
"valid\" using a queue size greather than "
"CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE",
IGetErrorString(err),
"CL_INVALID_VALUE or CL_INVALID_QUEUE_PROPERTIES",
__FILE__, __LINE__);
return TEST_FAIL;
}
}
}
return TEST_PASS; return TEST_PASS;
} }
@@ -166,3 +258,236 @@ REGISTER_TEST(negative_create_command_queue_with_properties_khr)
TEST_FAIL); TEST_FAIL);
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
{
cl_int err = 0;
if (get_device_cl_version(device) >= Version(3, 0))
{
cl_device_device_enqueue_capabilities device_capabilities = 0;
cl_int err = clGetDeviceInfo(
device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
sizeof(device_capabilities), &device_capabilities, nullptr);
test_error(err, "clGetDeviceInfo");
if (((device_capabilities & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) == 0)
&& ((device_capabilities & CL_DEVICE_QUEUE_SUPPORTED) == 1))
{
const cl_queue_properties properties[] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_ON_DEVICE
| CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
0
};
clCommandQueueWrapper cmd_queue =
clCreateCommandQueueWithProperties(context, device, properties,
&err);
test_error(err, "clCreateCommandQueueWithProperties");
err = clSetDefaultDeviceCommandQueue(context, device, cmd_queue);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION when \"device does not support a "
"replaceable default on-device queue\"",
TEST_FAIL);
}
}
err = clSetDefaultDeviceCommandQueue(nullptr, device, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_CONTEXT)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT when: \"context "
"is not a valid context\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_CONTEXT", __FILE__,
__LINE__);
return TEST_FAIL;
}
err = clSetDefaultDeviceCommandQueue(context, nullptr, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: \"device "
"is not a valid device\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
__LINE__);
return TEST_FAIL;
}
cl_device_id different_device = GetOpposingDevice(device);
if (different_device && device != different_device)
{
err = clSetDefaultDeviceCommandQueue(context, different_device, queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_DEVICE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_DEVICE when: "
"\"device is not associated with context\"",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_DEVICE", __FILE__,
__LINE__);
return TEST_FAIL;
}
}
err = clSetDefaultDeviceCommandQueue(context, device, nullptr);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{
log_error(
"ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return CL_INVALID_OPERATION "
"or CL_INVALID_COMMAND_QUEUE when: \"command_queue is not a valid "
"command-queue for device\" using a nullptr",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE", __FILE__,
__LINE__);
return TEST_FAIL;
}
{
constexpr cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
};
clCommandQueueWrapper not_on_device_queue =
clCreateCommandQueueWithProperties(context, device, props, &err);
test_error_fail(err, "clCreateCommandQueueWithProperties failed");
err = clSetDefaultDeviceCommandQueue(context, device,
not_on_device_queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue for "
"device\" using a command queue that is not on device",
IGetErrorString(err),
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
__FILE__, __LINE__);
}
}
return TEST_PASS;
}
REGISTER_TEST(negative_retain_command_queue)
{
cl_int err = clRetainCommandQueue(nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clRetainCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_release_command_queue)
{
cl_int err = clReleaseCommandQueue(nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clReleaseCommandQueue should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
static bool device_supports_on_device_queue(cl_device_id deviceID)
{
cl_command_queue_properties device_queue_properties = 0;
if (get_device_cl_version(deviceID) >= Version(2, 0))
{
cl_int err = clGetDeviceInfo(
deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
sizeof(device_queue_properties), &device_queue_properties, nullptr);
test_error(err, "clGetDeviceInfo");
return (device_queue_properties
& CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
}
return false;
}
REGISTER_TEST(negative_get_command_queue_info)
{
cl_int err =
clGetCommandQueueInfo(nullptr, CL_QUEUE_CONTEXT, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clGetCommandQueueInfo should return CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
if (device_supports_on_device_queue(device))
{
const cl_queue_properties properties[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
};
cl_int err = CL_INVALID_VALUE;
clCommandQueueWrapper cmd_queue = clCreateCommandQueueWithProperties(
context, device, properties, &err);
test_error(err, "clCreateCommandQueueWithProperties");
cl_uint queue_size = -1;
err = clGetCommandQueueInfo(cmd_queue, CL_QUEUE_SIZE,
sizeof(queue_size), &queue_size, nullptr);
test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
"clGetCommandQueueInfo should return "
"CL_INVALID_COMMAND_QUEUE when: \"command_queue "
"is not a valid command-queue for param_name\"",
TEST_FAIL);
}
constexpr cl_command_queue_info invalid_param = -1;
err = clGetCommandQueueInfo(queue, invalid_param, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: "
"\"param_name is not one of the supported values\"",
TEST_FAIL);
cl_uint ref_count = -1;
err = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, 0, &ref_count,
nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetCommandQueueInfo should return CL_INVALID_VALUE when: \"size in "
"bytes specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST_VERSION(negative_set_command_queue_property, Version(1, 0))
{
auto version = get_device_cl_version(device);
if (version >= Version(1, 1))
{
// Implementations are allowed to return an error for
// non-OpenCL 1.0 devices. In which case, skip the test.
return TEST_SKIPPED_ITSELF;
}
cl_queue_properties property{ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE };
cl_int err = clSetCommandQueueProperty(nullptr, property, CL_TRUE, nullptr);
test_failure_error_ret(
err, CL_INVALID_COMMAND_QUEUE,
"clSetCommandQueueProperty should return CL_INVALID_COMMAND_QUEUE "
"when: \"command_queue is not a valid command-queue\" using a nullptr",
TEST_FAIL);
property = -1;
err = clSetCommandQueueProperty(queue, property, CL_TRUE, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clSetCommandQueueProperty should return CL_INVALID_VALUE when: "
"\"values specified in properties are not valid\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -336,3 +336,161 @@ REGISTER_TEST(kernel_attributes)
} }
return success ? TEST_PASS : TEST_FAIL; return success ? TEST_PASS : TEST_FAIL;
} }
REGISTER_TEST(null_required_work_group_size)
{
cl_int error = CL_SUCCESS;
clGetKernelSuggestedLocalWorkSizeKHR_fn
clGetKernelSuggestedLocalWorkSizeKHR = nullptr;
if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
{
cl_platform_id platform = nullptr;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
&platform, NULL);
test_error(error, "clGetDeviceInfo for platform failed");
clGetKernelSuggestedLocalWorkSizeKHR =
(clGetKernelSuggestedLocalWorkSizeKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSuggestedLocalWorkSizeKHR");
test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
"Couldn't get function pointer for "
"clGetKernelSuggestedLocalWorkSizeKHR");
}
cl_uint device_max_dim = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(device_max_dim), &device_max_dim, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
test_assert_error(device_max_dim >= 3,
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
std::vector<size_t> device_max_work_item_sizes(device_max_dim);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * device_max_dim,
device_max_work_item_sizes.data(), nullptr);
size_t device_max_work_group_size = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(device_max_work_group_size),
&device_max_work_group_size, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
clMemWrapper dst;
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error);
struct KernelAttribInfo
{
std::string str;
cl_uint max_dim;
};
std::vector<KernelAttribInfo> attribs;
attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
const std::string body_str = R"(
__kernel void wg_size(__global int* dst)
{
if (get_global_id(0) == 0 &&
get_global_id(1) == 0 &&
get_global_id(2) == 0) {
dst[0] = get_local_size(0);
dst[1] = get_local_size(1);
dst[2] = get_local_size(2);
}
}
)";
for (auto& attrib : attribs)
{
const std::string source_str = attrib.str + body_str;
const char* source = source_str.c_str();
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "wg_size");
test_error(error, "Unable to create test kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
{
const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
work_dim >= 3 ? 4 : 1 };
const size_t test_work_group_size =
expected[0] * expected[1] * expected[2];
if ((size_t)expected[0] > device_max_work_item_sizes[0]
|| (size_t)expected[1] > device_max_work_item_sizes[1]
|| (size_t)expected[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size)
{
log_info("Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, expected[0], expected[1], expected[2],
test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1],
device_max_work_item_sizes[2],
device_max_work_group_size);
continue;
}
const cl_int zero = 0;
error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
sizeof(expected), 0, nullptr, nullptr);
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
global_work_size, nullptr, 0,
nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
cl_int results[3] = { -1, -1, -1 };
error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
if (results[0] != expected[0] || results[1] != expected[1]
|| results[2] != expected[2])
{
log_error("Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
results[0], results[1], results[2]);
return TEST_FAIL;
}
if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
{
size_t suggested[3] = { 1, 1, 1 };
error = clGetKernelSuggestedLocalWorkSizeKHR(
queue, kernel, work_dim, nullptr, global_work_size,
suggested);
test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed");
if ((cl_int)suggested[0] != expected[0]
|| (cl_int)suggested[1] != expected[1]
|| (cl_int)suggested[2] != expected[2])
{
log_error("Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
(cl_int)suggested[0], (cl_int)suggested[1],
(cl_int)suggested[2]);
return TEST_FAIL;
}
}
}
}
return TEST_PASS;
}

View File

@@ -1,6 +1,6 @@
// //
// Copyright (c) 2017 The Khronos Group Inc. // Copyright (c) 2017 The Khronos Group Inc.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
@@ -16,6 +16,8 @@
#include "testBase.h" #include "testBase.h"
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
#include "harness/conversions.h" #include "harness/conversions.h"
#include "harness/stringHelpers.h"
#include <array>
#include <vector> #include <vector>
const char *sample_single_test_kernel[] = { const char *sample_single_test_kernel[] = {
@@ -87,6 +89,16 @@ const char *sample_two_kernel_program[] = {
"\n" "\n"
"}\n" }; "}\n" };
const char *sample_sampler_size_test_kernel = R"(
__kernel void sampler_size_test(sampler_t sampler, __read_only image2d_t src, __global float4 *dst)
{
int tid = get_global_id(0);
int2 coord = (int2)(get_global_id(0), get_global_id(1));
float4 data = read_imagef(src, sampler, coord);
dst[tid] = data;
}
)";
const char *sample_mem_obj_size_test_kernel = R"( const char *sample_mem_obj_size_test_kernel = R"(
__kernel void mem_obj_size_test(__global int *src, __global int *dst) __kernel void mem_obj_size_test(__global int *src, __global int *dst)
{ {
@@ -117,6 +129,14 @@ const char *sample_write_only_image_test_kernel = R"(
} }
)"; )";
const char *sample_arg_size_test_kernel = R"(
%s
__kernel void arg_size_test(%s src, __global %s *dst)
{
dst[0]=src;
}
)";
REGISTER_TEST(get_kernel_info) REGISTER_TEST(get_kernel_info)
{ {
int error; int error;
@@ -734,6 +754,148 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(negative_invalid_arg_sampler)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper sampler_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_sampler_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
test_error(error,
"Unable to get sampler_size_test kernel for built program");
// Run the test - CL_INVALID_SAMPLER
error = clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler), nullptr);
test_failure_error_ret(
error, CL_INVALID_SAMPLER,
"clSetKernelArg is supposed to fail with CL_INVALID_SAMPLER when "
"argument is declared to be of type sampler_t and the specified "
"arg_value is not a valid sampler object",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_sampler_size)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper sampler_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_sampler_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
sampler_arg_kernel = clCreateKernel(program, "sampler_size_test", &error);
test_error(error,
"Unable to get sampler_size_test kernel for built program");
clSamplerWrapper sampler = clCreateSampler(
context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
test_error(error, "Unable to create sampler");
// Run the test - CL_INVALID_ARG_SIZE
error =
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) * 2, &sampler);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a sampler object and arg_size > sizeof(cl_sampler)",
TEST_FAIL);
error =
clSetKernelArg(sampler_arg_kernel, 0, sizeof(cl_sampler) / 2, &sampler);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a sampler object and arg_size < sizeof(cl_sampler)",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_size)
{
std::vector<ExplicitType> exp_types = { kChar, kUChar, kShort, kUShort,
kInt, kUInt, kLong, kULong,
kFloat, kHalf, kDouble };
bool fp16_supported = is_extension_available(device, "cl_khr_fp16");
bool fp64_supported = is_extension_available(device, "cl_khr_fp64");
for (unsigned int type_num = 0; type_num < exp_types.size(); type_num++)
{
auto type = exp_types[type_num];
if ((type == kLong || type == kULong) && !gHasLong)
continue;
else if (type == kDouble && !fp64_supported)
continue;
else if (type == kHalf && !fp16_supported)
continue;
else if (strchr(get_explicit_type_name(type), ' ') != 0)
continue;
std::array<unsigned int, 5> sizes = { 1, 2, 4, 8, 16 };
std::vector<char> buf(sizeof(cl_ulong16), 0);
for (unsigned i = 0; i < sizes.size(); i++)
{
clProgramWrapper program;
clKernelWrapper kernel;
size_t destStride = get_explicit_type_size(type) * sizes[i];
std::ostringstream vecNameStr;
vecNameStr << get_explicit_type_name(type);
if (sizes[i] != 1) vecNameStr << sizes[i];
std::string ext_str;
if (type == kDouble)
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (type == kHalf)
ext_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
auto vt_name = vecNameStr.str();
std::string program_source =
str_sprintf(std::string(sample_arg_size_test_kernel),
ext_str.c_str(), vt_name.c_str(), vt_name.c_str());
const char *ptr = program_source.c_str();
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &ptr, "arg_size_test");
test_error(error, "Unable to build test program!");
// Run the test
size_t reduced = destStride / 2;
error = clSetKernelArg(kernel, 0, reduced, buf.data());
if (error != CL_INVALID_ARG_SIZE)
{
std::stringstream sstr;
sstr << "clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_SIZE with type "
<< vecNameStr.str() << " and sizeof " << reduced
<< std::endl;
log_error("%s", sstr.str().c_str());
return TEST_FAIL;
}
}
}
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_mem_obj) REGISTER_TEST(negative_invalid_arg_mem_obj)
{ {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;

View File

@@ -255,18 +255,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
} }
// Initialize our storage // Initialize our storage
std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]); std::vector<cl_int> l_bin_counts(number_of_bins);
if (!l_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"bin_counters.\n");
return -1;
}
int i; int i;
for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0; for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
sizeof(cl_int) * number_of_bins, sizeof(cl_int) * number_of_bins,
l_bin_counts.get(), 0, NULL, NULL); l_bin_counts.data(), 0, NULL, NULL);
if (err) if (err)
{ {
log_error("add_index_bin_test FAILED to set initial values for " log_error("add_index_bin_test FAILED to set initial values for "
@@ -275,19 +269,12 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1; return -1;
} }
std::unique_ptr<cl_int[]> values( std::vector<cl_int> values(number_of_bins * max_counts_per_bin);
new cl_int[number_of_bins * max_counts_per_bin]);
if (!values)
{
log_error(
"add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1; for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
err = clEnqueueWriteBuffer(queue, bins, true, 0, err = clEnqueueWriteBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins sizeof(cl_int) * number_of_bins
* max_counts_per_bin, * max_counts_per_bin,
values.get(), 0, NULL, NULL); values.data(), 0, NULL, NULL);
if (err) if (err)
{ {
log_error( log_error(
@@ -296,13 +283,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1; return -1;
} }
std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]); std::vector<cl_int> l_bin_assignments(number_of_items);
if (!l_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"l_bin_assignments.\n");
return -1;
}
for (i = 0; i < number_of_items; i++) for (i = 0; i < number_of_items; i++)
{ {
int bin = random_in_range(0, number_of_bins - 1, d); int bin = random_in_range(0, number_of_bins - 1, d);
@@ -326,7 +307,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
} }
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
sizeof(cl_int) * number_of_items, sizeof(cl_int) * number_of_items,
l_bin_assignments.get(), 0, NULL, NULL); l_bin_assignments.data(), 0, NULL, NULL);
if (err) if (err)
{ {
log_error("add_index_bin_test FAILED to set initial values for " log_error("add_index_bin_test FAILED to set initial values for "
@@ -355,34 +336,22 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1; return -1;
} }
std::unique_ptr<cl_int[]> final_bin_assignments( std::vector<cl_int> final_bin_assignments(number_of_bins
new cl_int[number_of_bins * max_counts_per_bin]); * max_counts_per_bin);
if (!final_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"final_bin_assignments.\n");
return -1;
}
err = clEnqueueReadBuffer(queue, bins, true, 0, err = clEnqueueReadBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins sizeof(cl_int) * number_of_bins
* max_counts_per_bin, * max_counts_per_bin,
final_bin_assignments.get(), 0, NULL, NULL); final_bin_assignments.data(), 0, NULL, NULL);
if (err) if (err)
{ {
log_error("add_index_bin_test FAILED to read back bins: %d\n", err); log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
return -1; return -1;
} }
std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]); std::vector<cl_int> final_bin_counts(number_of_bins);
if (!final_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
"final_bin_counts.\n");
return -1;
}
err = clEnqueueReadBuffer(queue, bin_counters, true, 0, err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
sizeof(cl_int) * number_of_bins, sizeof(cl_int) * number_of_bins,
final_bin_counts.get(), 0, NULL, NULL); final_bin_counts.data(), 0, NULL, NULL);
if (err) if (err)
{ {
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",

View File

@@ -193,15 +193,38 @@ int AtomicTypeInfo::IsSupported(cl_device_id device)
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;} template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;}
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;} template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;} template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() {return 0;} template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() {return CL_FLT_MIN;} template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue() {return CL_DBL_MIN;} {
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
{
return -CL_FLT_MAX;
}
template <> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue()
{
return -CL_DBL_MAX;
}
template<> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue() {return CL_INT_MAX;} template <> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue()
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue() {return CL_UINT_MAX;} {
return CL_INT_MAX;
}
template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
{
return CL_UINT_MAX;
}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;} template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;} template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;} template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
{
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
{
return CL_FLT_MAX;
}
template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;} template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
cl_int getSupportedMemoryOrdersAndScopes( cl_int getSupportedMemoryOrdersAndScopes(

View File

@@ -79,6 +79,7 @@ extern cl_device_atomic_capabilities gAtomicMemCap,
extern cl_half_rounding_mode gHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported; extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
extern const char * extern const char *
@@ -893,15 +894,16 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
header += std::string("__global volatile ") + aTypeName + " destMemory[" header += std::string("__global volatile ") + aTypeName + " destMemory["
+ ss.str() + "] = {\n"; + ss.str() + "] = {\n";
ss.str(""); ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_FLOAT) == TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue; ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF) == TYPE_ATOMIC_HALF)
ss << static_cast<HostDataType>( ss << cl_half_to_float(static_cast<cl_half>(_startValue));
cl_half_to_float(static_cast<cl_half>(_startValue)));
else else
ss << _startValue; ss << _startValue;
for (cl_uint i = 0; i < maxNumDestItems; i++) for (cl_uint i = 0; i < maxNumDestItems; i++)
{ {
if (aTypeName == "atomic_flag") if (aTypeName == "atomic_flag")

View File

@@ -18,7 +18,6 @@
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include <mutex> #include <mutex>
#include "CL/cl_half.h" #include "CL/cl_half.h"
#ifdef WIN32 #ifdef WIN32
@@ -99,7 +98,19 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);
@@ -109,7 +120,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
} }
else else
{ {
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
return InterlockedExchangeAdd(a, c); return InterlockedExchangeAdd(a, c);
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_fetch_and_add(a, c); return __sync_fetch_and_add(a, c);
@@ -124,7 +135,15 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a -= c;
return old_value;
}
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);
@@ -173,14 +192,30 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_failure) TExplicitMemoryOrderType order_failure)
{ {
CorrespondingType tmp; CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile cl_half *>(a);
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
{
*reinterpret_cast<volatile cl_half *>(a) = desired;
return true;
}
*expected = tmp;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{ {
static std::mutex mtx; static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx); std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a); tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected) if (tmp == *expected)
{ {
*reinterpret_cast<volatile float *>(a) = desired; *a = desired;
return true; return true;
} }
*expected = tmp; *expected = tmp;
@@ -188,7 +223,6 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
else else
{ {
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected); tmp = InterlockedCompareExchange(a, desired, *expected);
#elif defined(__GNUC__) #elif defined(__GNUC__)
tmp = __sync_val_compare_and_swap(a, *expected, desired); tmp = __sync_val_compare_and_swap(a, *expected, desired);

View File

@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false; bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
test_status InitCL(cl_device_id device) { test_status InitCL(cl_device_id device) {
@@ -134,6 +135,14 @@ test_status InitCL(cl_device_id device) {
{ {
gFloatAtomicsSupported = true; gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64"))
{
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
}
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);

View File

@@ -417,7 +417,7 @@ public:
correct = true; correct = true;
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
{ {
if constexpr (std::is_same<HostDataType, cl_half>::value) if constexpr (std::is_same_v<HostDataType, cl_half>)
{ {
HostDataType test = cl_half_from_float(static_cast<float>(i), HostDataType test = cl_half_from_float(static_cast<float>(i),
gHalfRoundingMode); gHalfRoundingMode);
@@ -1163,13 +1163,30 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
num_elements, true); num_elements, true);
} }
template <typename T> double kahan_sum(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sum<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = num - compensation;
double t = sum + y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType> template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAdd class CBasicTestFetchAdd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> { : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range; double min_range;
double max_range; double max_range;
double max_error_fp32; double max_error;
std::vector<HostDataType> ref_vals; std::vector<HostDataType> ref_vals;
public: public:
@@ -1182,26 +1199,98 @@ public:
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM), useSVM),
min_range(-999.0), max_range(999.0), max_error_fp32(0.0) min_range(-999.0), max_range(999.0), max_error(0.0)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
StartValue(0.f); StartValue((HostDataType)0.0);
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false); HostDataType>::OldValueCheck(false);
} }
} }
template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
{
cl_half sum = 0;
for (auto it = begin; it != end; ++it)
{
sum = cl_half_from_float(cl_half_to_float(sum)
+ cl_half_to_float(*it),
gHalfRoundingMode);
}
return cl_half_to_float(sum);
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (threadCount > ref_vals.size()) if (threadCount > ref_vals.size())
{ {
ref_vals.resize(threadCount); ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d); ref_vals[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible summation error for given set.
std::vector<float> sums;
std::sort(ref_vals.begin(), ref_vals.end(),
[](cl_half a, cl_half b) {
return cl_half_to_float(a) < cl_half_to_float(b);
});
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
std::sort(ref_vals.begin(), ref_vals.end(),
[](cl_half a, cl_half b) {
return std::abs(cl_half_to_float(a))
< std::abs(cl_half_to_float(b));
});
float precise = 0.f;
for (auto elem : ref_vals) precise += cl_half_to_float(elem);
sums.push_back(precise);
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error = std::abs(sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
ref_vals[i] =
get_random_double(min_range, max_range, d);
else
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(), memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size()); sizeof(HostDataType) * ref_vals.size());
@@ -1216,12 +1305,17 @@ public:
sums.push_back( sums.push_back(
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort( std::sort(ref_vals.begin(), ref_vals.end(),
ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) {
[](float a, float b) { return std::abs(a) < std::abs(b); }); return std::abs(a) < std::abs(b);
});
double precise = 0.0; double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem); if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
precise = kahan_sum(ref_vals);
else
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise); sums.push_back(precise);
sums.push_back( sums.push_back(
@@ -1231,8 +1325,7 @@ public:
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort(sums.begin(), sums.end()); std::sort(sums.begin(), sums.end());
max_error_fp32 = max_error = std::abs(sums.front() - sums.back());
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
// restore unsorted order // restore unsorted order
memcpy(ref_vals.data(), startRefValues, memcpy(ref_vals.data(), startRefValues,
@@ -1252,7 +1345,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return " atomic_fetch_add" + postfix + "(&destMemory[0], (" return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]" + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1286,7 +1382,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -1312,7 +1411,23 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
expected = cl_half_from_float(
cl_half_to_float(expected)
+ cl_half_to_float(startRefValues[i]),
gHalfRoundingMode);
}
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
@@ -1331,12 +1446,22 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected return std::abs(cl_half_to_float(expected)
- cl_half_to_float(testValues[whichDestValue]))
> max_error;
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HostDataType)expected
- testValues[whichDestValue]) - testValues[whichDestValue])
> max_error_fp32; > max_error;
} }
return CBasicTestMemOrderScope< return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
@@ -1346,7 +1471,10 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override HostAtomicType *finalValues) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
correct = true; correct = true;
for (cl_uint i = 1; i < threadCount; i++) for (cl_uint i = 1; i < threadCount; i++)
@@ -1369,7 +1497,28 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1385,7 +1534,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return threadCount; return threadCount;
} }
@@ -1420,6 +1572,16 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float( CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM); TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST( EXECUTE_TEST(
@@ -1506,7 +1668,10 @@ public:
useSVM), useSVM),
min_range(-999.0), max_range(999.0), max_error(0.0) min_range(-999.0), max_range(999.0), max_error(0.0)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
{ {
StartValue(0); StartValue(0);
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
@@ -1514,6 +1679,13 @@ public:
} }
} }
template <typename Iterator> template <typename Iterator>
HostDataType subtract(Iterator begin, Iterator end)
{
HostDataType res = 0;
for (auto it = begin; it != end; ++it) res = res - *it;
return res;
}
template <typename Iterator>
float subtract_halfs(Iterator begin, Iterator end) float subtract_halfs(Iterator begin, Iterator end)
{ {
cl_half res = 0; cl_half res = 0;
@@ -1528,12 +1700,53 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (threadCount > ref_vals.size()) if (threadCount > ref_vals.size())
{ {
ref_vals.resize(threadCount); ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible subtraction error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(
ref_vals.begin(), ref_vals.end(),
[](float a, float b) { return std::abs(a) < std::abs(b); });
double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error =
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = cl_half_from_float( ref_vals[i] = cl_half_from_float(
get_random_float(min_range, max_range, d), get_random_float(min_range, max_range, d),
@@ -1563,7 +1776,6 @@ public:
float precise = 0.f; float precise = 0.f;
for (auto elem : ref_vals) precise -= cl_half_to_float(elem); for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
sums.push_back(precise); sums.push_back(precise);
sums.push_back( sums.push_back(
subtract_halfs(ref_vals.begin(), ref_vals.end())); subtract_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back( sums.push_back(
@@ -1571,7 +1783,6 @@ public:
std::sort(sums.begin(), sums.end()); std::sort(sums.begin(), sums.end());
max_error = std::abs(sums.front() - sums.back()); max_error = std::abs(sums.front() - sums.back());
// restore unsorted order // restore unsorted order
memcpy(ref_vals.data(), startRefValues, memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size()); sizeof(HostDataType) * ref_vals.size());
@@ -1590,7 +1801,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]" + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1612,7 +1826,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -1634,7 +1851,13 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
expected -= startRefValues[i];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
{ {
@@ -1659,7 +1882,14 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected
- testValues[whichDestValue])
> max_error;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return std::abs(cl_half_to_float(expected) return std::abs(cl_half_to_float(expected)
@@ -1674,7 +1904,7 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override HostAtomicType *finalValues) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
correct = true; correct = true;
for (cl_uint i = 1; i < threadCount; i++) for (cl_uint i = 1; i < threadCount; i++)
@@ -1697,7 +1927,17 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1713,7 +1953,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return threadCount; return threadCount;
} }
@@ -1748,6 +1991,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half( CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM); TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error, EXECUTE_TEST(error,
@@ -2624,7 +2872,10 @@ public:
min_range(-999.0), max_range(999.0) min_range(-999.0), max_range(999.0)
{ {
StartValue(DataType().MaxValue()); StartValue(DataType().MaxValue());
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false); HostDataType>::OldValueCheck(false);
@@ -2634,7 +2885,10 @@ public:
{ {
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
return " atomic_fetch_min" + postfix return " atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2653,7 +2907,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
host_atomic_fetch_min(&destMemory[0], oldValues[tid], host_atomic_fetch_min(&destMemory[0], oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -2669,7 +2926,19 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
{ {
@@ -2696,7 +2965,19 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (cl_half_to_float(startRefValues[i])
< cl_half_to_float(expected))
expected = startRefValues[i];
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
@@ -2716,7 +2997,9 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>:: return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -2731,12 +3014,14 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override HostAtomicType *finalValues) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
{ {
correct = true; correct = true;
for (cl_uint i = 1; i < threadCount; i++) for (cl_uint i = 1; i < threadCount; i++)
{ {
for (cl_uint i = 1; i < threadCount; i++) if (refValues[i] != StartValue())
{ {
log_error("Thread %d found %d mismatch(es)\n", i, log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]); (cl_uint)refValues[i]);
@@ -2754,7 +3039,31 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -2772,7 +3081,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
return threadCount; return threadCount;
} }
@@ -2807,6 +3119,16 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float( CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM); TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST( EXECUTE_TEST(
@@ -2890,18 +3212,31 @@ public:
useSVM), useSVM),
min_range(-999.0), max_range(999.0) min_range(-999.0), max_range(999.0)
{ {
StartValue(DataType().MinValue()); if constexpr (
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false); HostDataType>::OldValueCheck(false);
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
else
StartValue(-DataType().MaxValue());
}
else
{
StartValue(DataType().MinValue());
} }
} }
std::string ProgramCore() override std::string ProgramCore() override
{ {
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
return " atomic_fetch_max" + postfix return " atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2920,7 +3255,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
host_atomic_fetch_max(&destMemory[0], oldValues[tid], host_atomic_fetch_max(&destMemory[0], oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -2936,7 +3274,19 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
{ {
@@ -2963,7 +3313,19 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (whichDestValue == 0)
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (cl_half_to_float(startRefValues[i])
> cl_half_to_float(expected))
expected = startRefValues[i];
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
@@ -2983,7 +3345,9 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>:: return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -2998,7 +3362,9 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override HostAtomicType *finalValues) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
correct = true; correct = true;
for (cl_uint i = 1; i < threadCount; i++) for (cl_uint i = 1; i < threadCount; i++)
@@ -3021,7 +3387,31 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -3039,7 +3429,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
return threadCount; return threadCount;
} }
@@ -3074,6 +3467,16 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchMax<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float( CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM); TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST( EXECUTE_TEST(
@@ -3898,6 +4301,9 @@ private:
struct TestDefinition _subCase; struct TestDefinition _subCase;
}; };
#if 0
// The tests below are likely incorrect and have been disabled.
// See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544
static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements, cl_command_queue queue, int num_elements,
bool useSVM) bool useSVM)
@@ -3977,3 +4383,4 @@ REGISTER_TEST(svm_atomic_fence)
return test_atomic_fence_generic(device, context, queue, num_elements, return test_atomic_fence_generic(device, context, queue, num_elements,
true); true);
} }
#endif

View File

@@ -0,0 +1,13 @@
set(DIRECTX_WRAPPER_SOURCES
directx_wrapper.cpp
)
add_library(directx_wrapper STATIC ${DIRECTX_WRAPPER_SOURCES})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CLConform_INCLUDE_DIR})
if (WIN32)
target_link_libraries(directx_wrapper d3d12)
endif ()

View File

@@ -0,0 +1,71 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "directx_wrapper.hpp"
DirectXWrapper::DirectXWrapper()
{
HRESULT hr = D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL_12_0,
IID_PPV_ARGS(&dx_device));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create DirectX 12 device");
}
D3D12_COMMAND_QUEUE_DESC desc{};
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
hr = dx_device->CreateCommandQueue(&desc, IID_PPV_ARGS(&dx_command_queue));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create DirectX 12 command queue");
}
hr = dx_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
IID_PPV_ARGS(&dx_command_allocator));
if (FAILED(hr))
{
throw std::runtime_error(
"Failed to create DirectX 12 command allocator");
}
}
ID3D12Device* DirectXWrapper::getDXDevice() const { return dx_device.Get(); }
ID3D12CommandQueue* DirectXWrapper::getDXCommandQueue() const
{
return dx_command_queue.Get();
}
ID3D12CommandAllocator* DirectXWrapper::getDXCommandAllocator() const
{
return dx_command_allocator.Get();
}
DirectXFenceWrapper::DirectXFenceWrapper(ID3D12Device* dx_device)
: dx_device(dx_device)
{
if (!dx_device)
{
throw std::runtime_error("ID3D12Device is not valid");
}
const HRESULT hr = dx_device->CreateFence(0, D3D12_FENCE_FLAG_SHARED,
IID_PPV_ARGS(&dx_fence));
if (FAILED(hr))
{
throw std::runtime_error("Failed to create the DirectX fence");
}
}

View File

@@ -0,0 +1,47 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include <d3d12.h>
#include <wrl/client.h>
#include <stdexcept>
using namespace Microsoft::WRL;
class DirectXWrapper {
public:
DirectXWrapper();
ID3D12Device* getDXDevice() const;
ID3D12CommandQueue* getDXCommandQueue() const;
ID3D12CommandAllocator* getDXCommandAllocator() const;
protected:
ComPtr<ID3D12Device> dx_device = nullptr;
ComPtr<ID3D12CommandQueue> dx_command_queue = nullptr;
ComPtr<ID3D12CommandAllocator> dx_command_allocator = nullptr;
};
class DirectXFenceWrapper {
public:
DirectXFenceWrapper(ID3D12Device* dx_device);
ID3D12Fence* operator*() const { return dx_fence.Get(); }
private:
ComPtr<ID3D12Fence> dx_fence = nullptr;
ComPtr<ID3D12Device> dx_device = nullptr;
};

View File

@@ -57,7 +57,7 @@ namespace {
template <typename T> template <typename T>
int verify_degrees(const T *const inptr, const T *const outptr, int n) int verify_degrees(const T *const inptr, const T *const outptr, int n)
{ {
float error, max_error = 0.0f; float error, max_error = -INFINITY;
double r, max_val = NAN; double r, max_val = NAN;
int max_index = 0; int max_index = 0;
@@ -89,13 +89,16 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
} }
if (std::is_same<T, half>::value) if (std::is_same<T, half>::value)
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
max_error, max_index, max_val, conv_to_flt(outptr[max_index]), "vs %g)\n",
max_val, conv_to_flt(outptr[max_index])); max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
conv_to_flt(outptr[max_index]), max_val,
conv_to_flt(outptr[max_index]));
else else
log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", log_info("degrees: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
max_error, max_index, max_val, outptr[max_index], max_val, "vs %g)\n",
outptr[max_index]); max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
outptr[max_index], max_val, outptr[max_index]);
return 0; return 0;
} }
@@ -103,7 +106,7 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
template <typename T> template <typename T>
int verify_radians(const T *const inptr, const T *const outptr, int n) int verify_radians(const T *const inptr, const T *const outptr, int n)
{ {
float error, max_error = 0.0f; float error, max_error = -INFINITY;
double r, max_val = NAN; double r, max_val = NAN;
int max_index = 0; int max_index = 0;
@@ -135,13 +138,16 @@ int verify_radians(const T *const inptr, const T *const outptr, int n)
} }
if (std::is_same<T, half>::value) if (std::is_same<T, half>::value)
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
max_error, max_index, max_val, conv_to_flt(outptr[max_index]), "vs %g)\n",
max_val, conv_to_flt(outptr[max_index])); max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
conv_to_flt(outptr[max_index]), max_val,
conv_to_flt(outptr[max_index]));
else else
log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", log_info("radians: Max error %f ulps at %d, input %a: *%a vs %a (*%g "
max_error, max_index, max_val, outptr[max_index], max_val, "vs %g)\n",
outptr[max_index]); max_error, max_index, conv_to_flt(inptr[max_index]), max_val,
outptr[max_index], max_val, outptr[max_index]);
return 0; return 0;
} }

View File

@@ -1,5 +1,7 @@
set(MODULE_NAME COMPILER) set(MODULE_NAME COMPILER)
find_package(Python3 COMPONENTS Interpreter QUIET)
set(${MODULE_NAME}_SOURCES set(${MODULE_NAME}_SOURCES
main.cpp main.cpp
test_build_helpers.cpp test_build_helpers.cpp
@@ -52,7 +54,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_directory COMMAND ${CMAKE_COMMAND} -E copy_directory
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory ${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose COMMAND ${Python3_EXECUTABLE} ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM} DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
VERBATIM) VERBATIM)

View File

@@ -13,10 +13,62 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
#include <string>
#include <filesystem>
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include "harness/stringHelpers.h"
std::string spvBinariesPath = "spirv_bin";
const std::string spvBinariesPathArg = "--spirv-binaries-path";
void printUsage()
{
log_info("Reading SPIR-V files from default '%s' path.\n",
spvBinariesPath.c_str());
log_info("In case you want to set other directory use '%s' argument.\n",
spvBinariesPathArg.c_str());
}
int main(int argc, const char *argv[]) int main(int argc, const char *argv[])
{ {
bool modifiedSpvBinariesPath = false;
bool listTests = false;
for (int i = 0; i < argc; ++i)
{
int argsRemoveNum = 0;
if (argv[i] == spvBinariesPathArg)
{
if (i + 1 == argc)
{
log_error("Missing value for '%s' argument.\n",
spvBinariesPathArg.c_str());
return TEST_FAIL;
}
else
{
spvBinariesPath = std::string(argv[i + 1]);
argsRemoveNum += 2;
modifiedSpvBinariesPath = true;
}
}
if (argsRemoveNum > 0)
{
for (int j = i; j < (argc - argsRemoveNum); ++j)
argv[j] = argv[j + argsRemoveNum];
argc -= argsRemoveNum;
--i;
}
listTests |= (argv[i] == std::string("--list")
|| argv[i] == std::string("-list"));
}
if (modifiedSpvBinariesPath == false && !listTests)
{
printUsage();
}
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0); test_registry::getInstance().definitions(), false, 0);
} }

View File

@@ -24,6 +24,8 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "harness/conversions.h" #include "harness/conversions.h"
#include "harness/stringHelpers.h"
#include "harness/parseParameters.h"
#define MAX_LINE_SIZE_IN_PROGRAM 1024 #define MAX_LINE_SIZE_IN_PROGRAM 1024
#define MAX_LOG_SIZE_IN_PROGRAM 2048 #define MAX_LOG_SIZE_IN_PROGRAM 2048
@@ -158,6 +160,14 @@ const char *link_static_function_access = // use with compile_static_function
"extern int foo(int, int);\n" "extern int foo(int, int);\n"
"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n"; "int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
const char *multi_build_test_kernel = R"(
__kernel void test_kernel(__global int *dst)
{
int tid = get_global_id(0);
dst[tid] = BUILD_OPT_VAL;
}
)";
static int test_large_single_compile(cl_context context, cl_device_id deviceID, static int test_large_single_compile(cl_context context, cl_device_id deviceID,
unsigned int numLines) unsigned int numLines)
{ {
@@ -3059,12 +3069,13 @@ REGISTER_TEST(execute_after_included_header_link)
} }
const auto simple_header_path = temp_dir_path / simple_header_name; const auto simple_header_path = temp_dir_path / simple_header_name;
FILE *simple_header_file = const std::string simple_header_path_str =
fopen(simple_header_path.u8string().c_str(), "w"); to_string(simple_header_path.u8string());
FILE *simple_header_file = fopen(simple_header_path_str.c_str(), "w");
if (simple_header_file == NULL) if (simple_header_file == NULL)
{ {
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n", log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
simple_header_path.u8string().c_str(), __FILE__, __LINE__); simple_header_path_str.c_str(), __FILE__, __LINE__);
return -1; return -1;
} }
if (fprintf(simple_header_file, "%s", simple_header) < 0) if (fprintf(simple_header_file, "%s", simple_header) < 0)
@@ -3082,7 +3093,7 @@ REGISTER_TEST(execute_after_included_header_link)
} }
const std::string include_path = const std::string include_path =
std::string("-I") + temp_dir_path.generic_u8string(); std::string("-I") + to_string(temp_dir_path.generic_u8string());
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL, error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
NULL, NULL, NULL); NULL, NULL, NULL);
test_error(error, test_error(error,
@@ -3931,3 +3942,100 @@ REGISTER_TEST(compile_and_link_status_options_log)
return 0; return 0;
} }
REGISTER_TEST(multiple_build_program)
{
if (gCompilationMode != kOnline)
{
log_info(
"Skipping multiple_build_program, compilation mode not online\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error = CL_SUCCESS;
const size_t num_threads = num_elements;
clProgramWrapper program = clCreateProgramWithSource(
context, 1, &multi_build_test_kernel, nullptr, &error);
test_error(error, "clCreateProgramWithSource failed");
clMemWrapper out_stream_0 = clCreateBuffer(
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
test_error(error, "clCreateBuffer failed");
clMemWrapper out_stream_1 = clCreateBuffer(
context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_threads, NULL, &error);
test_error(error, "clCreateBuffer failed");
{
/* Build with the macro defined */
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=1 ", NULL,
NULL);
test_error(error, "clBuildProgram failed");
clKernelWrapper kernel0 =
clCreateKernel(program, "test_kernel", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel0, 0, sizeof(out_stream_0), &out_stream_0);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
/* Rebuild with the macro redefined */
error = clBuildProgram(program, 1, &device, "-DBUILD_OPT_VAL=2 ", NULL,
NULL);
test_error(error, "clBuildProgram failed");
clKernelWrapper kernel1 =
clCreateKernel(program, "test_kernel", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel1, 0, sizeof(out_stream_1), &out_stream_1);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
}
error = clFinish(queue);
test_error(error, "clFinish failed");
std::vector<cl_int> test_values(num_threads, 0);
error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,
sizeof(cl_int) * num_threads,
test_values.data(), 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < test_values.size(); i++)
{
if (test_values[i] != 1)
{
log_error("Unexpected test value %d for kernel0 at pos %zu.\n",
test_values[i], i);
return TEST_FAIL;
}
}
error = clEnqueueReadBuffer(queue, out_stream_1, true, 0,
sizeof(cl_int) * num_threads,
test_values.data(), 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < test_values.size(); i++)
{
if (test_values[i] != 2)
{
log_error("Unexpected test value %d for kernel1 at pos %zu.\n",
test_values[i], i);
return TEST_FAIL;
}
}
return TEST_PASS;
}

View File

@@ -94,6 +94,7 @@ const char *known_extensions[] = {
"cl_khr_external_memory_dma_buf", "cl_khr_external_memory_dma_buf",
"cl_khr_command_buffer", "cl_khr_command_buffer",
"cl_khr_command_buffer_mutable_dispatch", "cl_khr_command_buffer_mutable_dispatch",
"cl_khr_command_buffer_mutable_memory_commands",
"cl_khr_command_buffer_multi_device", "cl_khr_command_buffer_multi_device",
"cl_khr_external_memory_android_hardware_buffer", "cl_khr_external_memory_android_hardware_buffer",
"cl_khr_unified_svm", "cl_khr_unified_svm",

View File

@@ -32,8 +32,6 @@ const std::string slash = "\\";
#else #else
const std::string slash = "/"; const std::string slash = "/";
#endif #endif
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
+ slash + "spirv_bin" + slash + "write_kernel.spv";
const std::string spvExt = ".spv"; const std::string spvExt = ".spv";
@@ -338,8 +336,8 @@ public:
std::vector<unsigned char> kernel_buffer; std::vector<unsigned char> kernel_buffer;
std::string file_name = std::string file_name = spvBinariesPath + slash + "write_kernel.spv"
compilerSpvBinaries + std::to_string(address_bits); + std::to_string(address_bits);
m_spirv_binary = readBinary(file_name.c_str()); m_spirv_binary = readBinary(file_name.c_str());
m_spirv_size = m_spirv_binary.size(); m_spirv_size = m_spirv_binary.size();
} }

View File

@@ -1,4 +1,6 @@
#include <array> #include <string>
extern std::string spvBinariesPath;
static const char write_kernel_source[] = R"( static const char write_kernel_source[] = R"(
kernel void write_kernel(global unsigned int *p) { kernel void write_kernel(global unsigned int *p) {

View File

@@ -15,3 +15,6 @@ add_subdirectory( cl_ext_buffer_device_address )
if(VULKAN_IS_SUPPORTED) if(VULKAN_IS_SUPPORTED)
add_subdirectory( cl_khr_external_semaphore ) add_subdirectory( cl_khr_external_semaphore )
endif() endif()
if(D3D12_IS_SUPPORTED)
add_subdirectory( cl_khr_external_semaphore_dx_fence )
endif()

View File

@@ -17,6 +17,7 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_barrier.cpp command_buffer_test_barrier.cpp
command_buffer_test_event_info.cpp command_buffer_test_event_info.cpp
command_buffer_finalize.cpp command_buffer_finalize.cpp
command_buffer_pipelined_enqueue.cpp
negative_command_buffer_finalize.cpp negative_command_buffer_finalize.cpp
negative_command_buffer_svm_mem.cpp negative_command_buffer_svm_mem.cpp
negative_command_buffer_copy_image.cpp negative_command_buffer_copy_image.cpp

View File

@@ -27,9 +27,6 @@ BasicCommandBufferTest::BasicCommandBufferTest(cl_device_id device,
: CommandBufferTestBase(device), context(context), queue(nullptr), : CommandBufferTestBase(device), context(context), queue(nullptr),
num_elements(0), simultaneous_use_support(false), num_elements(0), simultaneous_use_support(false),
out_of_order_support(false), queue_out_of_order_support(false), out_of_order_support(false), queue_out_of_order_support(false),
// try to use simultaneous path by default
simultaneous_use_requested(true),
// due to simultaneous cases extend buffer size
buffer_size_multiplier(1), command_buffer(this) buffer_size_multiplier(1), command_buffer(this)
{ {
cl_int error = clRetainCommandQueue(queue); cl_int error = clRetainCommandQueue(queue);
@@ -72,9 +69,8 @@ bool BasicCommandBufferTest::Skip()
sizeof(capabilities), &capabilities, NULL); sizeof(capabilities), &capabilities, NULL);
test_error(error, test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR"); "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
simultaneous_use_support = simultaneous_use_requested simultaneous_use_support =
&& (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR) (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR) != 0;
!= 0;
out_of_order_support = out_of_order_support =
supported_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; supported_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
device_side_enqueue_support = device_side_enqueue_support =
@@ -167,19 +163,7 @@ cl_int BasicCommandBufferTest::SetUp(int elements)
error = SetUpKernelArgs(); error = SetUpKernelArgs();
test_error(error, "SetUpKernelArgs failed"); test_error(error, "SetUpKernelArgs failed");
if (simultaneous_use_support) command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR,
0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
}
else
{
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
}
test_error(error, "clCreateCommandBufferKHR failed"); test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS; return CL_SUCCESS;
@@ -192,11 +176,6 @@ cl_int MultiFlagCreationTest::Run()
// First try to find multiple flags that are supported by the driver and // First try to find multiple flags that are supported by the driver and
// device. // device.
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
if (is_extension_available( if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME)) device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
{ {
@@ -207,6 +186,11 @@ cl_int MultiFlagCreationTest::Run()
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME)) device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{ {
flags |= CL_COMMAND_BUFFER_MUTABLE_KHR; flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
} }
cl_command_buffer_properties_khr props[] = { CL_COMMAND_BUFFER_FLAGS_KHR, cl_command_buffer_properties_khr props[] = { CL_COMMAND_BUFFER_FLAGS_KHR,
@@ -381,11 +365,6 @@ cl_int ExplicitFlushTest::Run()
return CL_SUCCESS; return CL_SUCCESS;
} }
bool ExplicitFlushTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int InterleavedEnqueueTest::Run() cl_int InterleavedEnqueueTest::Run()
{ {
cl_int error = clCommandNDRangeKernelKHR( cl_int error = clCommandNDRangeKernelKHR(
@@ -431,11 +410,6 @@ cl_int InterleavedEnqueueTest::Run()
return CL_SUCCESS; return CL_SUCCESS;
} }
bool InterleavedEnqueueTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int EnqueueAndReleaseTest::Run() cl_int EnqueueAndReleaseTest::Run()
{ {
cl_int error = clCommandNDRangeKernelKHR( cl_int error = clCommandNDRangeKernelKHR(

View File

@@ -78,8 +78,11 @@ protected:
bool queue_out_of_order_support; bool queue_out_of_order_support;
bool device_side_enqueue_support; bool device_side_enqueue_support;
// user request for simultaneous use // Extends size of created 'in_mem' & 'out_mem' buffers, such that the same
bool simultaneous_use_requested; // cl_mem buffer can be used across multiple enqueues of a command-buffer.
// Accessed in the kernel at an offset for each enqueue which is passed as
// a kernel parameter through the 'off_mem' buffer.
// See BasicCommandBufferTest::SetUpKernel() definition.
unsigned buffer_size_multiplier; unsigned buffer_size_multiplier;
clCommandBufferWrapper command_buffer; clCommandBufferWrapper command_buffer;
}; };
@@ -116,7 +119,6 @@ struct ExplicitFlushTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest; using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override; cl_int Run() override;
bool Skip() override;
}; };
// Test enqueueing a command-buffer twice separated by another enqueue operation // Test enqueueing a command-buffer twice separated by another enqueue operation
@@ -125,7 +127,6 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
using BasicCommandBufferTest::BasicCommandBufferTest; using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override; cl_int Run() override;
bool Skip() override;
}; };
// Test releasing a command-buffer after it has been submitted for execution, // Test releasing a command-buffer after it has been submitted for execution,
@@ -156,9 +157,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
cl_version extension_version = cl_version extension_version =
get_extension_version(device, "cl_khr_command_buffer"); get_extension_version(device, "cl_khr_command_buffer");
if (extension_version != CL_MAKE_VERSION(0, 9, 7)) if (extension_version != CL_MAKE_VERSION(0, 9, 8))
{ {
log_info("cl_khr_command_buffer version 0.9.7 is required to run " log_info("cl_khr_command_buffer version 0.9.8 is required to run "
"the test, skipping.\n "); "the test, skipping.\n ");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }

View File

@@ -50,13 +50,14 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
virtual cl_int SetUp(int elements) override virtual cl_int SetUp(int elements) override
{ {
BasicCommandBufferTest::SetUp(elements); cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
cl_int error = init_extension_functions(); error = init_extension_functions();
test_error(error, "Unable to initialise extension functions"); test_error(error, "Unable to initialise extension functions");
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR; cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support) if (simultaneous_use_requested)
{ {
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR; prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
} }
@@ -90,10 +91,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
cl_version extension_version = get_extension_version( cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch"); device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 3)) if (extension_version != CL_MAKE_VERSION(0, 9, 4))
{ {
log_info("cl_khr_command_buffer_mutable_dispatch version " log_info("cl_khr_command_buffer_mutable_dispatch version "
"0.9.3 is " "0.9.4 is "
"required to run the test, skipping.\n "); "required to run the test, skipping.\n ");
extension_avaliable = false; extension_avaliable = false;
} }
@@ -128,6 +129,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
} }
clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
bool simultaneous_use_requested = false;
const char* kernelString = "__kernel void empty() {}"; const char* kernelString = "__kernel void empty() {}";
const size_t global_work_size = 4 * 16; const size_t global_work_size = 4 * 16;

View File

@@ -70,9 +70,9 @@ struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest
{ {
int offset = get_global_id(0); int offset = get_global_id(0);
int4 color = read_imagei( source, sampler, offset ); uint4 color = read_imageui( source, sampler, offset );
write_imagei( dest, offset, color ); write_imageui( dest, offset, color );
})"; })";
cl_int error; cl_int error;
@@ -260,9 +260,9 @@ struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
int4 color = read_imagei( source, sampler, (int2) (x, y) ); uint4 color = read_imageui( source, sampler, (int2) (x, y) );
write_imagei( dest, (int2) (x, y), color ); write_imageui( dest, (int2) (x, y), color );
})"; })";
cl_int error; cl_int error;

View File

@@ -35,9 +35,7 @@ struct IterativeArgUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue) cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue), : BasicMutableCommandBufferTest(device, context, queue),
command(nullptr) command(nullptr)
{ {}
simultaneous_use_requested = false;
}
bool Skip() override bool Skip() override
{ {

View File

@@ -33,9 +33,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
cl_command_queue queue) cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue), : BasicMutableCommandBufferTest(device, context, queue),
command_pri(nullptr), command_sec(nullptr) command_pri(nullptr), command_sec(nullptr)
{ {}
simultaneous_use_requested = false;
}
bool Skip() override bool Skip() override
{ {
@@ -47,7 +45,7 @@ struct MultipleCommandsDispatch : BasicMutableCommandBufferTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr) sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
// require mutable arguments capabillity // require mutable arguments capability
return !mutable_support; return !mutable_support;
} }

View File

@@ -34,9 +34,7 @@ struct OverwriteUpdateDispatch : BasicMutableCommandBufferTest
cl_command_queue queue) cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue), : BasicMutableCommandBufferTest(device, context, queue),
command(nullptr) command(nullptr)
{ {}
simultaneous_use_requested = false;
}
bool Skip() override bool Skip() override
{ {

View File

@@ -21,10 +21,12 @@
#include <CL/cl.h> #include <CL/cl.h>
#include <CL/cl_ext.h> #include <CL/cl_ext.h>
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// mutable dispatch tests which handle following cases: // mutable dispatch tests which handles
// - out-of-order queue use // - out-of-order queue with dependencies between command-buffer enqueues
// - out-of-order queue with simultaneous use // - out-of-order queue with simultaneous use
// - in-order queue with dependencies between command-buffer enqueues
// - in-order queue with simultaneous use // - in-order queue with simultaneous use
// - cross queue with dependencies between command-buffer enqueues
// - cross-queue with simultaneous use // - cross-queue with simultaneous use
namespace { namespace {
@@ -35,11 +37,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
SimultaneousMutableDispatchTest(cl_device_id device, cl_context context, SimultaneousMutableDispatchTest(cl_device_id device, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue), : BasicMutableCommandBufferTest(device, context, queue),
work_queue(nullptr), work_command_buffer(this), user_event(nullptr), work_queue(nullptr), work_command_buffer(this), new_in_mem(nullptr),
wait_pass_event(nullptr), command(nullptr) command(nullptr)
{ {
simultaneous_use_requested = simultaneous_request; simultaneous_use_requested = simultaneous_request;
if (simultaneous_request) buffer_size_multiplier = 2;
} }
cl_int SetUpKernel() override cl_int SetUpKernel() override
@@ -48,26 +49,36 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
test_error(error, "BasicCommandBufferTest::SetUpKernel failed"); test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
// create additional kernel to properly prepare output buffer for test // create additional kernel to properly prepare output buffer for test
const char* kernel_str = const char *kernel_str =
R"( R"(
__kernel void fill(int pattern, __global int* out, __global int* __kernel void mul(__global int* out, __global int* in, int mul_val)
offset)
{ {
size_t id = get_global_id(0); size_t id = get_global_id(0);
size_t ind = offset[0] + id ; out[id] = in[id] * mul_val;
out[ind] = pattern;
})"; })";
error = create_single_kernel_helper_create_program( error = create_single_kernel_helper_create_program(
context, &program_fill, 1, &kernel_str); context, &program_mul, 1, &kernel_str);
test_error(error, "Failed to create program with source"); test_error(error, "Failed to create program with source");
error = error =
clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); clBuildProgram(program_mul, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program"); test_error(error, "Failed to build program");
kernel_fill = clCreateKernel(program_fill, "fill", &error); kernel_mul = clCreateKernel(program_mul, "mul", &error);
test_error(error, "Failed to create copy kernel"); test_error(error, "Failed to create multiply kernel");
new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
new_in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -77,14 +88,13 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
cl_int error = BasicCommandBufferTest::SetUpKernelArgs(); cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed"); test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), error = clSetKernelArg(kernel_mul, 0, sizeof(out_mem), &out_mem);
&overwritten_pattern);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); error = clSetKernelArg(kernel_mul, 1, sizeof(off_mem), &in_mem);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); error = clSetKernelArg(kernel_mul, 2, sizeof(cl_int), &pattern_pri);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
return CL_SUCCESS; return CL_SUCCESS;
@@ -101,30 +111,28 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
&error); &error);
test_error(error, "Unable to create command queue to test with"); test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr prop =
CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_support)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
} }
else else
{ {
work_queue = queue; work_queue = queue;
work_command_buffer = command_buffer;
} }
cl_command_buffer_properties_khr prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
if (simultaneous_use_requested)
{
prop |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
}
const cl_command_buffer_properties_khr props[] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
prop,
0,
};
work_command_buffer =
clCreateCommandBufferKHR(1, &work_queue, props, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -145,293 +153,245 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|| !mutable_support; || !mutable_support;
} }
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_mul, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Serialize command-buffer enqueue, is a linear sequence of
* commands, with dependencies enforced using an in-order queue
* or cl_event dependencies.
*
* 1. Fill input buffer
* 2. Enqueue command-buffer doing: `output = a * input;
* 3. Read output buffer to host data so it can be verified later
* - Update command to new input buffer, new `a` val and use output
* buffer from previous invocation as new input buffer.
* 4. Enqueue command-buffer again.
* 5. Read new output buffer back to host data so it can be verified
* later
*
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE, 0,
data_size(), first_enqueue_output.data(),
(out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueReadBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &out_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
/* Simultaneous command-buffer pass enqueues a command-buffer twice
* without dependencies between the enqueues, but an update so that
* all the parameters are different to avoid race conditions in the
* kernel execution. The asynchronous task graph looks like:
*
* (Fill input A buffer) (Fill input B buffer)
* | |
* (Enqueue command_buffer) (Enqueue updated command_buffer)
* | |
* (Read output A buffer) (Read output B buffer)
*/
clEventWrapper E[4];
cl_int error = clEnqueueFillBuffer(
work_queue, in_mem, &pattern_fill, sizeof(cl_int), 0, data_size(),
0, nullptr, (out_of_order_request ? &E[0] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueFillBuffer(work_queue, new_in_mem, &pattern_fill_2,
sizeof(cl_int), 0, data_size(), 0, nullptr,
(out_of_order_request ? &E[1] : nullptr));
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[0] : nullptr),
(out_of_order_request ? &E[2] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
cl_mutable_dispatch_arg_khr arg_1{ 0, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr arg_2{ 1, sizeof(cl_mem), &new_in_mem };
cl_mutable_dispatch_arg_khr arg_3{ 2, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr args[] = { arg_1, arg_2, arg_3 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
3 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, work_command_buffer, (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[1] : nullptr),
(out_of_order_request ? &E[3] : nullptr));
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
work_queue, out_mem, CL_FALSE, 0, data_size(),
first_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[2] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(
work_queue, new_out_mem, CL_FALSE, 0, data_size(),
second_enqueue_output.data(), (out_of_order_request ? 1 : 0),
(out_of_order_request ? &E[3] : nullptr), nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int VerifySerializedPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * first_enqueue_ref;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int VerifySimultaneousPass(std::vector<cl_int> &first_enqueue_output,
std::vector<cl_int> &second_enqueue_output)
{
const cl_int first_enqueue_ref = pattern_pri * pattern_fill;
const cl_int second_enqueue_ref = pattern_sec * pattern_fill_2;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(first_enqueue_ref, first_enqueue_output[i],
i);
CHECK_VERIFICATION_ERROR(second_enqueue_ref,
second_enqueue_output[i], i);
}
return CL_SUCCESS;
}
cl_int Run() override cl_int Run() override
{ {
cl_int error = CL_SUCCESS; cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support) std::vector<cl_int> first_enqueue_output(num_elements);
std::vector<cl_int> second_enqueue_output(num_elements);
if (simultaneous_use_requested)
{ {
// enqueue simultaneous command-buffers with out-of-order calls error = RunSimultaneousPass(first_enqueue_output,
error = RunSimultaneous(); second_enqueue_output);
test_error(error, "RunSimultaneous failed"); test_error(error, "RunSimultaneousPass failed");
} }
else else
{ {
// enqueue single command-buffer with out-of-order calls error =
error = RunSingle(); RunSerializedPass(first_enqueue_output, second_enqueue_output);
test_error(error, "RunSingle failed"); test_error(error, "RunSerializedPass failed");
} }
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri;
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(work_command_buffer, nullptr, nullptr,
out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int RunSingle()
{
cl_int error;
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error =
clEnqueueReadBuffer(work_queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 1, &single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
error = clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 0,
nullptr, &single_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&single_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
cl_int RecordSimultaneousCommandBuffer()
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
work_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
&sync_points[0], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
// to avoid overwriting the entire result buffer instead of filling
// only relevant part this additional kernel was introduced
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel_fill, 1, nullptr,
&num_elements, nullptr, 0, nullptr, &sync_points[1], &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clCommandNDRangeKernelKHR(
work_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(work_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
std::vector<cl_int> updated_output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer(
work_queue, off_mem, &pd.offset, sizeof(cl_int), 0, sizeof(cl_int),
(wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]);
test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
// Retain new output memory object until the end of the test.
retained_output_buffers.push_back(new_out_mem);
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
cl_mutable_dispatch_arg_khr args[] = { arg_1 };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
args /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void* configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(work_command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// command buffer execution must wait for two wait-events
error =
clEnqueueCommandBufferKHR(0, nullptr, work_command_buffer, 2,
&pd.wait_events[0], &pd.wait_events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.updated_output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
cl_int RunSimultaneous()
{
cl_int error = RecordSimultaneousCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
wait_pass_event = pass.wait_events[2];
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(work_queue); error = clFinish(work_queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
// verify the result buffers // verify the result buffers
auto& first_pass_output = simul_passes[0].output_buffer; if (simultaneous_use_requested)
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
auto& second_pass_output = simul_passes[1].output_buffer;
auto& second_pass_updated_output =
simul_passes[1].updated_output_buffer;
for (size_t i = 0; i < num_elements; i++)
{ {
// First pass: error = VerifySimultaneousPass(first_enqueue_output,
// Before updating, out_mem is copied from in_mem (pattern_pri) second_enqueue_output);
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i); test_error(error, "VerifySimultaneousPass failed");
// After updating, new_out_mem is copied from in_mem (pattern_pri) }
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i], else
i); {
// Second pass: error = VerifySerializedPass(first_enqueue_output,
// Before updating, out_mem is filled with overwritten_pattern second_enqueue_output);
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i], test_error(error, "VerifySerializedPass failed");
i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
i);
} }
return CL_SUCCESS; return CL_SUCCESS;
@@ -440,22 +400,20 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
clCommandQueueWrapper work_queue; clCommandQueueWrapper work_queue;
clCommandBufferWrapper work_command_buffer; clCommandBufferWrapper work_command_buffer;
clEventWrapper user_event; clKernelWrapper kernel_mul;
clEventWrapper single_event; clProgramWrapper program_mul;
clEventWrapper wait_pass_event;
clKernelWrapper kernel_fill; clMemWrapper new_out_mem, new_in_mem;
clProgramWrapper program_fill;
std::vector<clMemWrapper> retained_output_buffers;
const size_t test_global_work_size = 3 * sizeof(cl_int);
const cl_int pattern_pri = 42; const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC;
const cl_int pattern_fill = 0xA;
const cl_int pattern_fill_2 = -3;
const cl_int overwritten_pattern = 0xACDC;
cl_mutable_command_khr command; cl_mutable_command_khr command;
}; };
template <bool simultaneous_use_request>
struct CrossQueueSimultaneousMutableDispatchTest struct CrossQueueSimultaneousMutableDispatchTest
: public BasicMutableCommandBufferTest : public BasicMutableCommandBufferTest
{ {
@@ -463,9 +421,9 @@ struct CrossQueueSimultaneousMutableDispatchTest
cl_context context, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue), : BasicMutableCommandBufferTest(device, context, queue),
queue_sec(nullptr), command(nullptr) queue_sec(nullptr), new_out_mem(nullptr), command(nullptr)
{ {
simultaneous_use_requested = true; simultaneous_use_requested = simultaneous_use_request;
} }
cl_int SetUpKernel() override cl_int SetUpKernel() override
@@ -488,6 +446,11 @@ struct CrossQueueSimultaneousMutableDispatchTest
kernel = clCreateKernel(program, "fill", &error); kernel = clCreateKernel(program, "fill", &error);
test_error(error, "Failed to create copy kernel"); test_error(error, "Failed to create copy kernel");
new_out_mem =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_int) * num_elements, nullptr, &error);
test_error(error, "clCreateBuffer failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -530,24 +493,18 @@ struct CrossQueueSimultaneousMutableDispatchTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr) sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !simultaneous_use_support || !mutable_support; return (simultaneous_use_requested && !simultaneous_use_support)
|| !mutable_support;
} }
cl_int Run() override cl_int Run() override
{ {
// record command buffer
cl_int pattern = 0;
cl_int error = clCommandFillBufferKHR(
command_buffer, nullptr, nullptr, out_mem, &pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
cl_command_properties_khr props[] = { cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
}; };
error = clCommandNDRangeKernelKHR( cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command); nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed"); test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -555,16 +512,15 @@ struct CrossQueueSimultaneousMutableDispatchTest
error = clFinalizeCommandBufferKHR(command_buffer); error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed"); test_error(error, "clFinalizeCommandBufferKHR failed");
// enqueue command buffer to default queue // If we are testing not using simultaneous-use then we need to use
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, // an event to serialize the execution order to the command-buffer
nullptr, nullptr); // submission to each queue.
clEventWrapper E;
error = clEnqueueCommandBufferKHR(
0, nullptr, command_buffer, 0, nullptr,
(simultaneous_use_requested ? nullptr : &E));
test_error(error, "clEnqueueCommandBufferKHR failed"); test_error(error, "clEnqueueCommandBufferKHR failed");
// update mutable parameters
clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
data_size(), nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_mutable_dispatch_arg_khr arg_0{ 0, sizeof(cl_int), &pattern_sec }; cl_mutable_dispatch_arg_khr arg_0{ 0, sizeof(cl_int), &pattern_sec };
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem }; &new_out_mem };
@@ -594,30 +550,35 @@ struct CrossQueueSimultaneousMutableDispatchTest
test_error(error, "clUpdateMutableCommandsKHR failed"); test_error(error, "clUpdateMutableCommandsKHR failed");
// enqueue command buffer to non-default queue // enqueue command buffer to non-default queue
error = clEnqueueCommandBufferKHR(1, &queue_sec, command_buffer, 0, error = clEnqueueCommandBufferKHR(
nullptr, nullptr); 1, &queue_sec, command_buffer, (simultaneous_use_requested ? 0 : 1),
(simultaneous_use_requested ? nullptr : &E), nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed"); test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue_sec);
test_error(error, "clFinish failed");
// read result of command buffer execution // read result of command buffer execution
std::vector<cl_int> output_data(num_elements); std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
std::vector<cl_int> sec_output_data(num_elements);
error = error =
clEnqueueReadBuffer(queue_sec, new_out_mem, CL_TRUE, 0, data_size(), clEnqueueReadBuffer(queue_sec, new_out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr); sec_output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
// verify the result // verify the result
for (size_t i = 0; i < num_elements; i++) for (size_t i = 0; i < num_elements; i++)
{ {
CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i); CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
CHECK_VERIFICATION_ERROR(pattern_sec, sec_output_data[i], i);
} }
return CL_SUCCESS; return CL_SUCCESS;
} }
clCommandQueueWrapper queue_sec; clCommandQueueWrapper queue_sec;
clMemWrapper new_out_mem;
const cl_int pattern_pri = 42; const cl_int pattern_pri = 42;
const cl_int pattern_sec = 0xACDC; const cl_int pattern_sec = 0xACDC;
cl_mutable_command_khr command; cl_mutable_command_khr command;
@@ -637,14 +598,26 @@ REGISTER_TEST(mutable_dispatch_simultaneous_out_of_order)
device, context, queue, num_elements); device, context, queue, num_elements);
} }
REGISTER_TEST(mutable_dispatch_in_order)
{
return MakeAndRunTest<SimultaneousMutableDispatchTest<false, false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_in_order) REGISTER_TEST(mutable_dispatch_simultaneous_in_order)
{ {
return MakeAndRunTest<SimultaneousMutableDispatchTest<true, false>>( return MakeAndRunTest<SimultaneousMutableDispatchTest<true, false>>(
device, context, queue, num_elements); device, context, queue, num_elements);
} }
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue) REGISTER_TEST(mutable_dispatch_cross_queue)
{ {
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest>( return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<false>>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_simultaneous_cross_queue)
{
return MakeAndRunTest<CrossQueueSimultaneousMutableDispatchTest<true>>(
device, context, queue, num_elements); device, context, queue, num_elements);
} }

View File

@@ -79,11 +79,7 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue), : BasicCommandBufferTest(device, context, queue),
command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr), command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr),
out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr) out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr)
{ {}
simultaneous_use_requested =
(event_mode == EventMode::RET_COMBUF_WAIT_FOR_COMBUF) ? true
: false;
}
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
cl_int SetUpKernel() override cl_int SetUpKernel() override
@@ -159,9 +155,6 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
{ {
if (BasicCommandBufferTest::Skip()) return true; if (BasicCommandBufferTest::Skip()) return true;
if (simultaneous_use_requested && !simultaneous_use_support)
return true;
if (out_of_order_requested && !out_of_order_support) return true; if (out_of_order_requested && !out_of_order_support) return true;
return false; return false;

View File

@@ -48,6 +48,39 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
: BasicCommandBufferTest(device, context, queue) : BasicCommandBufferTest(device, context, queue)
{} {}
bool Skip() override
{
if (BasicCommandBufferTest::Skip()) return true;
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
return !simultaneous_use_support
|| !(is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME));
}
return false;
}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp() failed");
if (test_mode == CombufInfoTestMode::CITM_PROP_ARRAY)
{
cl_command_buffer_properties_khr properties[3] = {
CL_COMMAND_BUFFER_FLAGS_KHR,
CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
};
command_buffer =
clCreateCommandBufferKHR(1, &queue, properties, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
return CL_SUCCESS;
}
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
cl_int Run() override cl_int Run() override
{ {
@@ -237,33 +270,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR); error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed"); test_error(error, "verify_state failed");
error = clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
clEventWrapper execute_event;
// enqueued command buffer blocked on user event
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// execute command buffer
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "verify_state failed");
test_error(signal_error, "clSetUserEventStatus failed");
error = clWaitForEvents(1, &execute_event);
test_error(error, "Unable to wait for execute event");
// verify executable state
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "verify_state failed");
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -21,11 +21,9 @@
namespace { namespace {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// out-of-order tests for cl_khr_command_buffer which handles below cases: // Tests for cl_khr_command_buffer which handles submitting a command-buffer to
// -test case for out-of-order command-buffer // an out-of-order queue.
// -test an out-of-order command-buffer with simultaneous use
template <bool simultaneous_request>
struct OutOfOrderTest : public BasicCommandBufferTest struct OutOfOrderTest : public BasicCommandBufferTest
{ {
OutOfOrderTest(cl_device_id device, cl_context context, OutOfOrderTest(cl_device_id device, cl_context context,
@@ -35,18 +33,11 @@ struct OutOfOrderTest : public BasicCommandBufferTest
user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
program_fill(nullptr) program_fill(nullptr)
{ {
simultaneous_use_requested = simultaneous_request; buffer_size_multiplier = 2; // two enqueues of command-buffer
if (simultaneous_request) buffer_size_multiplier = 2;
} }
//--------------------------------------------------------------------------
cl_int SetUpKernel() override cl_int SetUpKernel() override
{ {
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernel(); cl_int error = BasicCommandBufferTest::SetUpKernel();
test_error(error, "BasicCommandBufferTest::SetUpKernel failed"); test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
@@ -74,14 +65,8 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override cl_int SetUpKernelArgs() override
{ {
// if device doesn't support simultaneous use which was requested
// we can skip creation of OCL resources
if (simultaneous_use_requested && !simultaneous_use_support)
return CL_SUCCESS;
cl_int error = BasicCommandBufferTest::SetUpKernelArgs(); cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed"); test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
@@ -98,7 +83,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override cl_int SetUp(int elements) override
{ {
cl_int error = BasicCommandBufferTest::SetUp(elements); cl_int error = BasicCommandBufferTest::SetUp(elements);
@@ -108,110 +92,23 @@ struct OutOfOrderTest : public BasicCommandBufferTest
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with"); test_error(error, "Unable to create command queue to test with");
cl_command_buffer_properties_khr properties[3] = { out_of_order_command_buffer =
CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0 clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
};
if (simultaneous_use_requested && simultaneous_use_support)
properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
out_of_order_command_buffer = clCreateCommandBufferKHR(
1, &out_of_order_queue, properties, &error);
test_error(error, "clCreateCommandBufferKHR failed"); test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
bool Skip() override bool Skip() override
{ {
if (BasicCommandBufferTest::Skip()) return true; if (BasicCommandBufferTest::Skip()) return true;
return !out_of_order_support;
if (!out_of_order_support
|| (simultaneous_use_requested && !simultaneous_use_support))
return true;
return false;
} }
//-------------------------------------------------------------------------- cl_int RecordCommandBuffer() const
cl_int Run() override
{
cl_int error = CL_SUCCESS;
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with out-of-order calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with out-of-order calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer()
{ {
cl_sync_point_khr sync_points[2]; cl_sync_point_khr sync_points[2];
const cl_int pattern = pattern_pri; // fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern,
sizeof(cl_int), 0, data_size(), 0, nullptr, &sync_points[0],
nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
nullptr, out_mem, &overwritten_pattern,
sizeof(cl_int), 0, data_size(), 0,
nullptr, &sync_points[1], nullptr);
test_error(error, "clCommandFillBufferKHR failed");
error = clCommandNDRangeKernelKHR(
out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
&num_elements, nullptr, 2, sync_points, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&user_event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordSimultaneousCommandBuffer() const
{
cl_sync_point_khr sync_points[2];
// for both simultaneous passes this call will fill entire in_mem buffer
cl_int error = clCommandFillBufferKHR( cl_int error = clCommandFillBufferKHR(
out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern_pri, out_of_order_command_buffer, nullptr, nullptr, in_mem, &pattern_pri,
sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
@@ -236,79 +133,63 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- struct EnqueuePassData
struct SimulPassData
{ {
cl_int offset; cl_int offset;
std::vector<cl_int> output_buffer; std::vector<cl_int> output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event // 0: offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3]; clEventWrapper wait_events[2];
}; };
//-------------------------------------------------------------------------- cl_int EnqueuePass(EnqueuePassData& pd)
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{ {
cl_int error = CL_SUCCESS;
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
pd.wait_events[0] = user_event;
// filling offset buffer must wait for previous pass completeness // filling offset buffer must wait for previous pass completeness
error = clEnqueueFillBuffer( cl_int error = clEnqueueFillBuffer(
out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
(wait_pass_event != nullptr ? &wait_pass_event : nullptr), (wait_pass_event != nullptr ? &wait_pass_event : nullptr),
&pd.wait_events[1]); &pd.wait_events[0]);
test_error(error, "clEnqueueFillBuffer failed"); test_error(error, "clEnqueueFillBuffer failed");
// command buffer execution must wait for two wait-events // command buffer execution must wait for two wait-events
error = clEnqueueCommandBufferKHR( error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], 0, nullptr, out_of_order_command_buffer, 1, &pd.wait_events[0],
&pd.wait_events[2]); &pd.wait_events[1]);
test_error(error, "clEnqueueCommandBufferKHR failed"); test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(), pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1, pd.output_buffer.data(), 1,
&pd.wait_events[2], nullptr); &pd.wait_events[1], nullptr);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- cl_int Run() override
cl_int RunSimultaneous()
{ {
cl_int error = RecordSimultaneousCommandBuffer(); cl_int error = RecordCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed"); test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements); cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) }, { 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) } { offset, std::vector<cl_int>(num_elements) }
}; };
for (auto&& pass : simul_passes) for (auto&& pass : enqueue_passes)
{ {
error = EnqueueSimultaneousPass(pass); error = EnqueuePass(pass);
test_error(error, "EnqueueSimultaneousPass failed"); test_error(error, "EnqueuePass failed");
wait_pass_event = pass.wait_events[2]; wait_pass_event = pass.wait_events[1];
} }
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(out_of_order_queue); error = clFinish(out_of_order_queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
// verify the result buffers // verify the result buffers
for (auto&& pass : simul_passes) for (auto&& pass : enqueue_passes)
{ {
auto& res_data = pass.output_buffer; auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++) for (size_t i = 0; i < num_elements; i++)
@@ -320,7 +201,6 @@ struct OutOfOrderTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
clCommandQueueWrapper out_of_order_queue; clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer; clCommandBufferWrapper out_of_order_command_buffer;
@@ -338,12 +218,5 @@ struct OutOfOrderTest : public BasicCommandBufferTest
REGISTER_TEST(out_of_order) REGISTER_TEST(out_of_order)
{ {
return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue, return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
num_elements);
}
REGISTER_TEST(simultaneous_out_of_order)
{
return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
num_elements);
} }

View File

@@ -0,0 +1,321 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "basic_command_buffer.h"
#include <vector>
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests for multiple sequential submissions of a command-buffer without a
// blocking wait between them, but using the following mechanisms to serialize
// execution of the submissions.
// * In-order queue dependencies
// * Event dependencies in command-buffer submissions to an out-of-order queue
// * Barrier submissions between command-buffer submissions to an out-of-order
// queue
// Base class that individual test fixtures are derived from
struct CommandBufferPipelined : public BasicCommandBufferTest
{
CommandBufferPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue)
{}
cl_int SetUpKernel() override
{
const char* mul_kernel_str =
R"(
__kernel void mul_by_val(int in, __global int* data)
{
size_t id = get_global_id(0);
data[id] *= in;
}
__kernel void increment(__global int* data)
{
size_t id = get_global_id(0);
data[id]++;
})";
cl_int error = create_single_kernel_helper_create_program(
context, &program, 1, &mul_kernel_str);
test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
test_error(error, "Failed to build program");
mul_kernel = clCreateKernel(program, "mul_by_val", &error);
test_error(error, "Failed to create mul_by_val kernel");
inc_kernel = clCreateKernel(program, "increment", &error);
test_error(error, "Failed to create increment kernel");
return CL_SUCCESS;
}
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * buffer_size_multiplier
* sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_int val_arg = pattern;
error = clSetKernelArg(mul_kernel, 0, sizeof(cl_int), &val_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(mul_kernel, 1, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(inc_kernel, 0, sizeof(out_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer(clCommandBufferWrapper& cmd_buf)
{
cl_int error = clCommandNDRangeKernelKHR(
cmd_buf, nullptr, nullptr, inc_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(cmd_buf);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Zero initialize buffer before starting test
cl_int zero_pattern = 0;
error =
clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int),
0, data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
return CL_SUCCESS;
}
const cl_int pattern = 42;
clKernelWrapper inc_kernel = nullptr;
clKernelWrapper mul_kernel = nullptr;
};
struct InOrderPipelined : public CommandBufferPipelined
{
InOrderPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue)
{}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error =
clEnqueueNDRangeKernel(queue, mul_kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
};
struct EventPipelined : public CommandBufferPipelined
{
EventPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, &events[0]);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1,
nullptr, &num_elements, nullptr, 1,
&events[0], &events[1]);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 1, &events[1], &events[2]);
test_error(error, "clEnqueueCommandBufferKHR failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 1,
&events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
clEventWrapper events[3] = { nullptr, nullptr, nullptr };
};
struct BarrierPipelined : public CommandBufferPipelined
{
BarrierPipelined(cl_device_id device, cl_context context,
cl_command_queue queue)
: CommandBufferPipelined(device, context, queue),
out_of_order_queue(nullptr), out_of_order_command_buffer(this)
{}
bool Skip() override
{
return CommandBufferPipelined::Skip() || !out_of_order_support;
}
cl_int SetUp(int elements) override
{
cl_int error = CommandBufferPipelined::SetUp(elements);
test_error(error, "EventPipelined::SetUp failed");
out_of_order_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
test_error(error, "Unable to create command queue to test with");
out_of_order_command_buffer =
clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = RecordCommandBuffer(out_of_order_command_buffer);
test_error(error, "RecordCommandBuffer failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error =
clEnqueueNDRangeKernel(out_of_order_queue, mul_kernel, 1, nullptr,
&num_elements, nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
error = clEnqueueCommandBufferKHR(
0, nullptr, out_of_order_command_buffer, 0, nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueBarrier(out_of_order_queue);
test_error(error, "clEnqueueBarrier failed");
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
data_size(), output_data.data(), 0, nullptr,
nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify
const cl_int ref = pattern + 1;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(ref, output_data[i], i);
}
return CL_SUCCESS;
}
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
};
} // anonymous namespace
REGISTER_TEST(pipeline_in_order_deps)
{
return MakeAndRunTest<InOrderPipelined>(device, context, queue,
num_elements);
}
REGISTER_TEST(pipeline_event_deps)
{
return MakeAndRunTest<EventPipelined>(device, context, queue, num_elements);
}
REGISTER_TEST(pipeline_barrier_deps)
{
return MakeAndRunTest<BarrierPipelined>(device, context, queue,
num_elements);
}

View File

@@ -44,27 +44,18 @@
namespace { namespace {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// printf tests for cl_khr_command_buffer which handles below cases: // Test for cl_khr_command_buffer which handles a command-buffer containing a
// -test cases for device side printf // printf kernel being repeatedly enqueued.
// -test cases for device side printf with a simultaneous use command-buffer
template <bool simul_use>
struct CommandBufferPrintfTest : public BasicCommandBufferTest struct CommandBufferPrintfTest : public BasicCommandBufferTest
{ {
CommandBufferPrintfTest(cl_device_id device, cl_context context, CommandBufferPrintfTest(cl_device_id device, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), : BasicCommandBufferTest(device, context, queue), file_descriptor(0)
trigger_event(nullptr), wait_event(nullptr), file_descriptor(0),
printf_use_support(false)
{ {
simultaneous_use_requested = simul_use; buffer_size_multiplier = num_test_iters;
if (simul_use)
{
buffer_size_multiplier = num_test_iters;
}
} }
//--------------------------------------------------------------------------
void ReleaseOutputStream(int fd) void ReleaseOutputStream(int fd)
{ {
fflush(stdout); fflush(stdout);
@@ -72,7 +63,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
close(fd); close(fd);
} }
//--------------------------------------------------------------------------
int AcquireOutputStream(int* error) int AcquireOutputStream(int* error)
{ {
int fd = streamDup(fileno(stdout)); int fd = streamDup(fileno(stdout));
@@ -85,7 +75,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return fd; return fd;
} }
//--------------------------------------------------------------------------
void GetAnalysisBuffer(std::stringstream& buffer) void GetAnalysisBuffer(std::stringstream& buffer)
{ {
std::ifstream fp(temp_filename, std::ios::in); std::ifstream fp(temp_filename, std::ios::in);
@@ -95,7 +84,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} }
} }
//--------------------------------------------------------------------------
void PurgeTempFile() void PurgeTempFile()
{ {
std::ofstream ofs(temp_filename, std::ofstream ofs(temp_filename,
@@ -103,9 +91,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
ofs.close(); ofs.close();
} }
//--------------------------------------------------------------------------
bool Skip() override bool Skip() override
{ {
if (BasicCommandBufferTest::Skip()) return true;
// Query if device supports kernel printf use // Query if device supports kernel printf use
cl_device_command_buffer_capabilities_khr capabilities; cl_device_command_buffer_capabilities_khr capabilities;
cl_int error = cl_int error =
@@ -114,16 +103,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
test_error(error, test_error(error,
"Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR"); "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
printf_use_support = const bool printf_use_support =
(capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR) (capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
!= 0; != 0;
if (!printf_use_support) return true; return !printf_use_support;
return BasicCommandBufferTest::Skip()
|| (simultaneous_use_requested && !simultaneous_use_support);
} }
//--------------------------------------------------------------------------
cl_int SetUpKernel() override cl_int SetUpKernel() override
{ {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
@@ -153,14 +139,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
size_t data_size() const override size_t data_size() const override
{ {
return sizeof(cl_char) * num_elements * buffer_size_multiplier return sizeof(cl_char) * num_elements * buffer_size_multiplier
* max_pattern_length; * max_pattern_length;
} }
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override cl_int SetUpKernelArgs() override
{ {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
@@ -192,7 +176,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override cl_int SetUp(int elements) override
{ {
auto pcFname = get_temp_filename(); auto pcFname = get_temp_filename();
@@ -209,39 +192,10 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements); return BasicCommandBufferTest::SetUp(elements);
} }
//--------------------------------------------------------------------------
cl_int Run() override
{
cl_int error = CL_SUCCESS;
// record command buffer with primary queue
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_support)
{
// enqueue simultaneous command-buffers with printf calls
error = RunSimultaneous();
test_error(error, "RunSimultaneous failed");
}
else
{
// enqueue single command-buffer with printf calls
error = RunSingle();
test_error(error, "RunSingle failed");
}
std::remove(temp_filename.c_str());
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer() cl_int RecordCommandBuffer()
{ {
cl_int error = CL_SUCCESS; cl_int error = clCommandNDRangeKernelKHR(
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr); nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed"); test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -251,7 +205,6 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
#define test_error_release_stdout(errCode, msg) \ #define test_error_release_stdout(errCode, msg) \
{ \ { \
auto errCodeResult = errCode; \ auto errCodeResult = errCode; \
@@ -263,96 +216,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} \ } \
} }
//-------------------------------------------------------------------------- struct EnqueuePassData
cl_int EnqueueSinglePass(const std::vector<cl_char>& pattern,
std::vector<cl_char>& output_data)
{
cl_int error = CL_SUCCESS;
auto in_mem_size = sizeof(cl_char) * pattern.size();
error = clEnqueueWriteBuffer(queue, in_mem, CL_TRUE, 0, in_mem_size,
&pattern[0], 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
test_assert_error(pattern.size() - 1 <= CL_UINT_MAX,
"pattern.size() - 1 does not fit in a cl_uint");
cl_uint offset[] = { 0, static_cast<cl_uint>(pattern.size() - 1) };
error = clEnqueueWriteBuffer(queue, off_mem, CL_TRUE, 0, sizeof(offset),
offset, 0, nullptr, nullptr);
test_error(error, "clEnqueueWriteBuffer failed");
// redirect output stream to temporary file
file_descriptor = AcquireOutputStream(&error);
if (error != 0)
{
log_error("Error while redirection stdout to file");
return TEST_FAIL;
}
// enqueue command buffer with kernel containing printf command
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &wait_event);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
fflush(stdout);
// Wait until kernel finishes its execution and (thus) the output
// printed from the kernel is immediately printed
error = clWaitForEvents(1, &wait_event);
test_error(error, "clWaitForEvents failed");
// output buffer contains pattern to be compared with printout
error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueReadBuffer failed");
error = clFinish(queue);
test_error_release_stdout(error, "clFinish failed");
ReleaseOutputStream(file_descriptor);
// copy content of temporary file into string stream
std::stringstream sstr;
GetAnalysisBuffer(sstr);
if (sstr.str().size() != num_elements * offset[1])
{
log_error("GetAnalysisBuffer failed\n");
return TEST_FAIL;
}
// verify the result - compare printout and output buffer
for (size_t i = 0; i < num_elements * offset[1]; i++)
{
CHECK_VERIFICATION_ERROR(sstr.str().at(i), output_data[i], i);
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_char> output_data(num_elements * max_pattern_length);
for (unsigned i = 0; i < num_test_iters; i++)
{
unsigned pattern_length =
std::max(min_pattern_length, rand() % max_pattern_length);
char pattern_character = 'a' + rand() % 26;
std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern[pattern_length] = '\0';
error = EnqueueSinglePass(pattern, output_data);
test_error(error, "EnqueueSinglePass failed");
output_data.assign(output_data.size(), 0);
PurgeTempFile();
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{ {
// null terminated character buffer // null terminated character buffer
std::vector<cl_char> pattern; std::vector<cl_char> pattern;
@@ -361,8 +225,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> output_buffer; std::vector<cl_char> output_buffer;
}; };
//-------------------------------------------------------------------------- cl_int EnqueuePass(EnqueuePassData& pd)
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{ {
// write current pattern to device memory // write current pattern to device memory
auto in_mem_size = sizeof(cl_char) * pd.pattern.size(); auto in_mem_size = sizeof(cl_char) * pd.pattern.size();
@@ -377,15 +240,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pd.offset, 0, nullptr, nullptr); pd.offset, 0, nullptr, nullptr);
test_error_release_stdout(error, "clEnqueueWriteBuffer failed"); test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
// create user event to block simultaneous command buffers error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
if (!trigger_event) nullptr, nullptr);
{
trigger_event = clCreateUserEvent(context, &error);
test_error_release_stdout(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed"); test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
// output buffer contains pattern to be compared with printout // output buffer contains pattern to be compared with printout
@@ -398,14 +254,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
cl_int Run() override
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
{ {
cl_int error = CL_SUCCESS; cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length); cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length);
std::vector<SimulPassData> simul_passes(num_test_iters); std::vector<EnqueuePassData> enqueue_passes(num_test_iters);
const int pattern_chars_range = 26; const int pattern_chars_range = 26;
std::list<cl_char> pattern_chars; std::list<cl_char> pattern_chars;
@@ -413,7 +269,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
pattern_chars.push_back(cl_char('a' + i)); pattern_chars.push_back(cl_char('a' + i));
test_assert_error(pattern_chars.size() >= num_test_iters, test_assert_error(pattern_chars.size() >= num_test_iters,
"Number of simultaneous launches must be lower than " "Number of launches must be lower than "
"size of characters container"); "size of characters container");
cl_int total_pattern_coverage = 0; cl_int total_pattern_coverage = 0;
@@ -428,11 +284,12 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
std::vector<cl_char> pattern(pattern_length + 1, pattern_character); std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
pattern.back() = '\0'; pattern.back() = '\0';
simul_passes[i] = { pattern, enqueue_passes[i] = {
{ cl_int(i * offset), cl_int(pattern_length) }, pattern,
std::vector<cl_char>(num_elements { cl_int(i * offset), cl_int(pattern_length) },
* pattern_length) }; std::vector<cl_char>(num_elements * pattern_length)
total_pattern_coverage += simul_passes[i].output_buffer.size(); };
total_pattern_coverage += enqueue_passes[i].output_buffer.size();
pattern_chars.erase(it); pattern_chars.erase(it);
}; };
@@ -444,17 +301,14 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
return TEST_FAIL; return TEST_FAIL;
} }
// enqueue read/write and command buffer operations // enqueue read/write and command buffer operations, serialized
for (auto&& pass : simul_passes) // by in-order queue
for (auto&& pass : enqueue_passes)
{ {
error = EnqueueSimultaneousPass(pass); error = EnqueuePass(pass);
test_error_release_stdout(error, "EnqueueSimultaneousPass failed"); test_error_release_stdout(error, "EnqueuePass failed");
} }
// execute command buffers
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error_release_stdout(error, "clSetUserEventStatus failed");
// flush streams // flush streams
fflush(stdout); fflush(stdout);
@@ -477,13 +331,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
for (int i = 0; i < total_pattern_coverage; i++) for (int i = 0; i < total_pattern_coverage; i++)
counters_map[sstr.str().at(i)]++; counters_map[sstr.str().at(i)]++;
if (counters_map.size() != simul_passes.size()) if (counters_map.size() != enqueue_passes.size())
{ {
log_error("printout inconsistent with input data\n"); log_error("printout inconsistent with input data\n");
return TEST_FAIL; return TEST_FAIL;
} }
for (auto&& pass : simul_passes) for (auto&& pass : enqueue_passes)
{ {
auto& res_data = pass.output_buffer; auto& res_data = pass.output_buffer;
@@ -501,18 +355,13 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} }
} }
std::remove(temp_filename.c_str());
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
clEventWrapper wait_event = nullptr;
std::string temp_filename; std::string temp_filename;
int file_descriptor; int file_descriptor;
bool printf_use_support;
// specifies max test length for printf pattern // specifies max test length for printf pattern
const unsigned max_pattern_length = 6; const unsigned max_pattern_length = 6;
// specifies min test length for printf pattern // specifies min test length for printf pattern
@@ -523,14 +372,8 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
} // anonymous namespace } // anonymous namespace
REGISTER_TEST(basic_printf) REGISTER_TEST(printf)
{ {
return MakeAndRunTest<CommandBufferPrintfTest<false>>(device, context, return MakeAndRunTest<CommandBufferPrintfTest>(device, context, queue,
queue, num_elements); num_elements);
}
REGISTER_TEST(simultaneous_printf)
{
return MakeAndRunTest<CommandBufferPrintfTest<true>>(device, context, queue,
num_elements);
} }

View File

@@ -86,21 +86,17 @@ cl_int VerifyResult(const clEventWrapper& event)
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Command-buffer profiling test cases: // Command-buffer profiling test for enqueuing command-buffer twice and checking
// -all commands are recorded to a single command-queue // the profiling counters on the events returned.
// -profiling a command-buffer with simultaneous use
template <bool simultaneous_request>
struct CommandBufferProfiling : public BasicCommandBufferTest struct CommandBufferProfiling : public BasicCommandBufferTest
{ {
CommandBufferProfiling(cl_device_id device, cl_context context, CommandBufferProfiling(cl_device_id device, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), wait_event(nullptr) : BasicCommandBufferTest(device, context, queue)
{ {
simultaneous_use_requested = simultaneous_request; buffer_size_multiplier = 2; // Do two enqueues of command-buffer
if (simultaneous_request) buffer_size_multiplier = 2;
} }
//--------------------------------------------------------------------------
bool Skip() override bool Skip() override
{ {
if (BasicCommandBufferTest::Skip()) return true; if (BasicCommandBufferTest::Skip()) return true;
@@ -127,10 +123,9 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
"Queue property CL_QUEUE_PROFILING_ENABLE not supported \n"); "Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
return true; return true;
} }
return (simultaneous_use_requested && !simultaneous_use_support); return false;
} }
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override cl_int SetUp(int elements) override
{ {
@@ -156,37 +151,45 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements); return BasicCommandBufferTest::SetUp(elements);
} }
//-------------------------------------------------------------------------- struct EnqueuePassData
{
cl_int offset;
clEventWrapper query_event;
};
cl_int Run() override cl_int Run() override
{ {
cl_int error = CL_SUCCESS; cl_int error = RecordCommandBuffer();
// record command buffer
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed"); test_error(error, "RecordCommandBuffer failed");
if (simultaneous_use_requested) cl_int offset = static_cast<cl_int>(num_elements);
std::vector<EnqueuePassData> enqueue_passes = {
{ 0, clEventWrapper() }, { offset, clEventWrapper() }
};
// In-order queue serialized the command-buffer submissions
for (auto&& pass : enqueue_passes)
{ {
// enqueue simultaneous command-buffers with profiling command queue error = EnqueuePass(pass);
error = RunSimultaneous(); test_error(error, "EnqueueSerializedPass failed");
test_error(error, "RunSimultaneous failed");
} }
else
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : enqueue_passes)
{ {
// enqueue single command-buffer with profiling command queue error = VerifyResult(pass.query_event);
error = RunSingle(); test_error(error, "VerifyResult failed");
test_error(error, "RunSingle failed");
} }
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer() cl_int RecordCommandBuffer()
{ {
cl_int error = CL_SUCCESS; cl_int error = clCommandNDRangeKernelKHR(
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr); nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed"); test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -196,41 +199,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- cl_int EnqueuePass(EnqueuePassData& pd)
cl_int RunSingle()
{
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
clEventWrapper query_event;
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, &query_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
error = VerifyResult(query_event);
test_error(error, "VerifyResult failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int offset;
std::vector<cl_int> output_buffer;
clEventWrapper query_event;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{ {
cl_int error = clEnqueueFillBuffer( cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pattern, sizeof(cl_int), queue, out_mem, &pattern, sizeof(cl_int),
@@ -241,59 +210,13 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr); 0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed"); test_error(error, "clEnqueueFillBuffer failed");
if (!wait_event) error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
{ nullptr, &pd.query_event);
wait_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&wait_event, &pd.query_event);
test_error(error, "clEnqueueCommandBufferKHR failed"); test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RunSimultaneous()
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueueSimultaneousPass failed");
}
error = clSetUserEventStatus(wait_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
for (auto&& pass : simul_passes)
{
error = VerifyResult(pass.query_event);
test_error(error, "VerifyResult failed");
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
clEventWrapper wait_event;
const cl_int pattern = 0xA; const cl_int pattern = 0xA;
}; };
@@ -356,19 +279,13 @@ struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
}; };
} // anonymous namespace } // anonymous namespace
REGISTER_TEST(basic_profiling) REGISTER_TEST(profiling)
{ {
return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue, return MakeAndRunTest<CommandBufferProfiling>(device, context, queue,
num_elements); num_elements);
} }
REGISTER_TEST(simultaneous_profiling) REGISTER_TEST(profiling_substitute_queue)
{
return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
num_elements);
}
REGISTER_TEST(substitute_queue_profiling)
{ {
return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>( return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
device, context, queue, num_elements); device, context, queue, num_elements);

View File

@@ -23,21 +23,16 @@ namespace {
// Command-queue substitution tests which handles below cases: // Command-queue substitution tests which handles below cases:
// -substitution on queue without properties // -substitution on queue without properties
// -substitution on queue with properties // -substitution on queue with properties
// -simultaneous use queue substitution
template <bool prop_use, bool simul_use> template <bool prop_use>
struct SubstituteQueueTest : public BasicCommandBufferTest struct SubstituteQueueTest : public BasicCommandBufferTest
{ {
SubstituteQueueTest(cl_device_id device, cl_context context, SubstituteQueueTest(cl_device_id device, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), : BasicCommandBufferTest(device, context, queue),
properties_use_requested(prop_use), user_event(nullptr) properties_use_requested(prop_use)
{ {}
simultaneous_use_requested = simul_use;
if (simul_use) buffer_size_multiplier = 2;
}
//--------------------------------------------------------------------------
bool Skip() override bool Skip() override
{ {
if (properties_use_requested) if (properties_use_requested)
@@ -57,11 +52,9 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return true; return true;
} }
return BasicCommandBufferTest::Skip() return BasicCommandBufferTest::Skip();
|| (simultaneous_use_requested && !simultaneous_use_support);
} }
//--------------------------------------------------------------------------
cl_int SetUp(int elements) override cl_int SetUp(int elements) override
{ {
// By default command queue is created without properties, // By default command queue is created without properties,
@@ -81,7 +74,6 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return BasicCommandBufferTest::SetUp(elements); return BasicCommandBufferTest::SetUp(elements);
} }
//--------------------------------------------------------------------------
cl_int Run() override cl_int Run() override
{ {
// record command buffer with primary queue // record command buffer with primary queue
@@ -106,23 +98,14 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
test_error(error, "clCreateCommandQueue failed"); test_error(error, "clCreateCommandQueue failed");
} }
if (simultaneous_use_support)
{ // enqueue single command-buffer with substitute queue
// enque simultaneous command-buffers with substitute queue error = RunSingle(new_queue);
error = RunSimultaneous(new_queue); test_error(error, "RunSingle failed");
test_error(error, "RunSimultaneous failed");
}
else
{
// enque single command-buffer with substitute queue
error = RunSingle(new_queue);
test_error(error, "RunSingle failed");
}
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer() cl_int RecordCommandBuffer()
{ {
cl_int error = clCommandNDRangeKernelKHR( cl_int error = clCommandNDRangeKernelKHR(
@@ -135,14 +118,13 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RunSingle(const cl_command_queue& q) cl_int RunSingle(const cl_command_queue& q)
{ {
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements); std::vector<cl_int> output_data(num_elements);
error = clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0, cl_int error =
data_size(), 0, nullptr, nullptr); clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed"); test_error(error, "clEnqueueFillBuffer failed");
cl_command_queue queues[] = { q }; cl_command_queue queues[] = { q };
@@ -165,90 +147,8 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
struct SimulPassData
{
cl_int pattern;
cl_int offset;
cl_command_queue queue;
std::vector<cl_int> output_buffer;
};
//--------------------------------------------------------------------------
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{
cl_int error = clEnqueueFillBuffer(
pd.queue, in_mem, &pd.pattern, sizeof(cl_int),
pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error =
clEnqueueFillBuffer(pd.queue, off_mem, &pd.offset, sizeof(cl_int),
0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
if (!user_event)
{
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
cl_command_queue queues[] = { pd.queue };
error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer(
pd.queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int),
data_size(), pd.output_buffer.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
cl_int RunSimultaneous(const cl_command_queue& q)
{
cl_int error = CL_SUCCESS;
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ pattern_pri, 0, q, std::vector<cl_int>(num_elements) },
{ pattern_sec, offset, q, std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
{
error = EnqueueSimultaneousPass(pass);
test_error(error, "EnqueuePass failed");
}
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
for (auto&& pass : simul_passes)
{
error = clFinish(pass.queue);
test_error(error, "clFinish failed");
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pass.pattern, res_data[i], i);
}
}
return CL_SUCCESS;
}
//--------------------------------------------------------------------------
const cl_int pattern_pri = 0xB; const cl_int pattern_pri = 0xB;
const cl_int pattern_sec = 0xC;
bool properties_use_requested; bool properties_use_requested;
clEventWrapper user_event;
}; };
// Command-queue substitution tests which handles below cases: // Command-queue substitution tests which handles below cases:
@@ -397,20 +297,14 @@ struct QueueOrderTest : public BasicCommandBufferTest
REGISTER_TEST(queue_substitution) REGISTER_TEST(queue_substitution)
{ {
return MakeAndRunTest<SubstituteQueueTest<false, false>>( return MakeAndRunTest<SubstituteQueueTest<false>>(device, context, queue,
device, context, queue, num_elements); num_elements);
} }
REGISTER_TEST(properties_queue_substitution) REGISTER_TEST(queue_substitution_properties)
{ {
return MakeAndRunTest<SubstituteQueueTest<true, false>>( return MakeAndRunTest<SubstituteQueueTest<true>>(device, context, queue,
device, context, queue, num_elements); num_elements);
}
REGISTER_TEST(simultaneous_queue_substitution)
{
return MakeAndRunTest<SubstituteQueueTest<false, true>>(
device, context, queue, num_elements);
} }
REGISTER_TEST(queue_substitute_in_order) REGISTER_TEST(queue_substitute_in_order)

View File

@@ -22,25 +22,22 @@ namespace {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// clSetKernelArg tests for cl_khr_command_buffer which handles below cases: // clSetKernelArg tests for cl_khr_command_buffer which handles below cases:
// -test interactions of clSetKernelArg with command-buffers // -test interactions of clSetKernelArg after command-buffer finalize but
// -test interactions of clSetKernelArg on a command-buffer pending execution // before enqueue
// -test interactions of clSetKernelArg between command-buffer enqueue
template <bool simul_use> template <bool enqueue_test>
struct CommandBufferSetKernelArg : public BasicCommandBufferTest struct CommandBufferSetKernelArg : public BasicCommandBufferTest
{ {
CommandBufferSetKernelArg(cl_device_id device, cl_context context, CommandBufferSetKernelArg(cl_device_id device, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), trigger_event(nullptr) : BasicCommandBufferTest(device, context, queue)
{ {
simultaneous_use_requested = simul_use; if (enqueue_test) buffer_size_multiplier = 2;
if (simul_use) buffer_size_multiplier = 2;
} }
//--------------------------------------------------------------------------
cl_int SetUpKernel() override cl_int SetUpKernel() override
{ {
cl_int error = CL_SUCCESS;
const char* kernel_str = const char* kernel_str =
R"( R"(
__kernel void copy(int in, __global int* out, __global int* offset) __kernel void copy(int in, __global int* out, __global int* offset)
@@ -50,8 +47,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
out[ind] = in; out[ind] = in;
})"; })";
error = create_single_kernel_helper_create_program(context, &program, 1, cl_int error = create_single_kernel_helper_create_program(
&kernel_str); context, &program, 1, &kernel_str);
test_error(error, "Failed to create program with source"); test_error(error, "Failed to create program with source");
error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
@@ -63,7 +60,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int SetUpKernelArgs() override cl_int SetUpKernelArgs() override
{ {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
@@ -99,15 +95,14 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int Run() override cl_int Run() override
{ {
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
if (simultaneous_use_requested) if (enqueue_test)
{ {
// enqueue simultaneous command-buffers with clSetKernelArg calls // enqueue command-buffers with clSetKernelArg calls in between
error = RunSimultaneous(); error = RunMultipleEnqueue();
test_error(error, "RunSimultaneous failed"); test_error(error, "RunMultipleEnqueue failed");
} }
else else
{ {
@@ -119,12 +114,9 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RecordCommandBuffer() cl_int RecordCommandBuffer()
{ {
cl_int error = CL_SUCCESS; cl_int error = clCommandNDRangeKernelKHR(
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr); nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed"); test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -148,14 +140,12 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
cl_int RunSingle() cl_int RunSingle()
{ {
cl_int error = CL_SUCCESS;
std::vector<cl_int> output_data(num_elements); std::vector<cl_int> output_data(num_elements);
// record command buffer // record command buffer
error = RecordCommandBuffer(); cl_int error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed"); test_error(error, "RecordCommandBuffer failed");
const cl_int pattern_base = 0; const cl_int pattern_base = 0;
@@ -187,20 +177,16 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- struct EnqueuePassData
struct SimulPassData
{ {
cl_int pattern; cl_int pattern;
cl_int offset; cl_int offset;
std::vector<cl_int> output_buffer; std::vector<cl_int> output_buffer;
}; };
//-------------------------------------------------------------------------- cl_int RecordEnqueueCommandBuffer() const
cl_int RecordSimultaneousCommandBuffer() const
{ {
cl_int error = CL_SUCCESS; cl_int error = clCommandNDRangeKernelKHR(
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr); nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed"); test_error(error, "clCommandNDRangeKernelKHR failed");
@@ -210,8 +196,7 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- cl_int EnqueuePass(EnqueuePassData& pd)
cl_int EnqueueSimultaneousPass(SimulPassData& pd)
{ {
cl_int error = clEnqueueFillBuffer( cl_int error = clEnqueueFillBuffer(
queue, out_mem, &pd.pattern, sizeof(cl_int), queue, out_mem, &pd.pattern, sizeof(cl_int),
@@ -222,14 +207,8 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
0, sizeof(cl_int), 0, nullptr, nullptr); 0, sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed"); test_error(error, "clEnqueueFillBuffer failed");
if (!trigger_event) error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
{ nullptr, nullptr);
trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
}
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&trigger_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed"); test_error(error, "clEnqueueCommandBufferKHR failed");
error = clEnqueueReadBuffer( error = clEnqueueReadBuffer(
@@ -240,49 +219,39 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//-------------------------------------------------------------------------- cl_int RunMultipleEnqueue()
cl_int RunSimultaneous()
{ {
cl_int error = CL_SUCCESS;
// record command buffer with primary queue // record command buffer with primary queue
error = RecordSimultaneousCommandBuffer(); cl_int error = RecordEnqueueCommandBuffer();
test_error(error, "RecordSimultaneousCommandBuffer failed"); test_error(error, "RecordEnqueeuCommandBuffer failed");
std::vector<SimulPassData> simul_passes = { cl_int offset = static_cast<cl_int>(num_elements);
{ 0, 0, std::vector<cl_int>(num_elements) } std::vector<EnqueuePassData> enqueue_passes = {
{ 0, 0, std::vector<cl_int>(num_elements) },
{ 1, offset, std::vector<cl_int>(num_elements) }
}; };
error = EnqueueSimultaneousPass(simul_passes.front()); for (auto&& pass : enqueue_passes)
test_error(error, "EnqueueSimultaneousPass 1 failed");
// changing kernel args at this point should have no effect,
// test will verify if clSetKernelArg didn't affect command-buffer
cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "clSetKernelArg failed");
if (simultaneous_use_support)
{ {
cl_int offset = static_cast<cl_int>(num_elements); // changing kernel args at this point should have no effect,
simul_passes.push_back( // test will verify if clSetKernelArg didn't affect command-buffer
{ 1, offset, std::vector<cl_int>(num_elements) }); cl_int in_arg = pattern_sec;
error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
test_error(error, "clSetKernelArg failed");
error = EnqueueSimultaneousPass(simul_passes.back()); error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
test_error(error, "EnqueueSimultaneousPass 2 failed"); test_error(error, "clSetKernelArg failed");
error = EnqueuePass(pass);
test_error(error, "EnqueuePass failed");
} }
error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
error = clFinish(queue); error = clFinish(queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
// verify the result buffer // verify the result buffer
for (auto&& pass : simul_passes) for (auto&& pass : enqueue_passes)
{ {
auto& res_data = pass.output_buffer; auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++) for (size_t i = 0; i < num_elements; i++)
@@ -294,9 +263,6 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
//--------------------------------------------------------------------------
clEventWrapper trigger_event = nullptr;
const cl_int pattern_pri = 2; const cl_int pattern_pri = 2;
const cl_int pattern_sec = 3; const cl_int pattern_sec = 3;
@@ -305,13 +271,13 @@ struct CommandBufferSetKernelArg : public BasicCommandBufferTest
} // anonymous namespace } // anonymous namespace
REGISTER_TEST(basic_set_kernel_arg) REGISTER_TEST(set_kernel_arg_after_finalize)
{ {
return MakeAndRunTest<CommandBufferSetKernelArg<false>>( return MakeAndRunTest<CommandBufferSetKernelArg<false>>(
device, context, queue, num_elements); device, context, queue, num_elements);
} }
REGISTER_TEST(pending_set_kernel_arg) REGISTER_TEST(set_kernel_arg_after_enqueue)
{ {
return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context, return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context,
queue, num_elements); queue, num_elements);

View File

@@ -131,9 +131,10 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true; if (BasicCommandBufferTest::Skip()) return true;
bool skip = true; bool skip = true;
if (simultaneous_use_support) if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{ {
rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR; rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false; skip = false;
} }
else if (is_extension_available( else if (is_extension_available(
@@ -142,13 +143,6 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR; rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
skip = false; skip = false;
} }
else if (is_extension_available(
device,
CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
{
rep_prop = CL_COMMAND_BUFFER_MUTABLE_KHR;
skip = false;
}
return skip; return skip;
} }
@@ -185,7 +179,9 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
if (BasicCommandBufferTest::Skip()) return true; if (BasicCommandBufferTest::Skip()) return true;
bool skip = true; bool skip = true;
if (!simultaneous_use_support) if (is_extension_available(
device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME)
&& !simultaneous_use_support)
{ {
unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR; unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
skip = false; skip = false;

View File

@@ -66,102 +66,6 @@ struct EnqueueCommandBufferNotFinalized : public BasicCommandBufferTest
} }
}; };
// CL_INVALID_OPERATION if command_buffer was not created with the
// CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR flag and is in the Pending state.
struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
: public BasicCommandBufferTest
{
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState(
cl_device_id device, cl_context context, cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr)
{}
cl_int Run() override
{
cl_int error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clEnqueueCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
error = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
clFinish(queue);
return CL_SUCCESS;
}
cl_int SetUp(int elements) override
{
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
&state, nullptr);
test_error_ret(error, "clGetCommandBufferInfoKHR failed",
TEST_FAIL);
test_assert_error(
state == expected,
"Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query!");
return TEST_PASS;
};
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
error = RecordCommandBuffer();
test_error(error, "RecordCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
test_error(error, "State is not Executable");
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
return CL_SUCCESS;
}
cl_int RecordCommandBuffer()
{
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
return CL_SUCCESS;
}
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
};
// CL_INVALID_VALUE if queues is NULL and num_queues is > 0, or queues is not // CL_INVALID_VALUE if queues is NULL and num_queues is > 0, or queues is not
// NULL and num_queues is 0. // NULL and num_queues is 0.
struct EnqueueCommandBufferNullQueuesNumQueues : public BasicCommandBufferTest struct EnqueueCommandBufferNullQueuesNumQueues : public BasicCommandBufferTest
@@ -623,14 +527,6 @@ REGISTER_TEST(negative_enqueue_command_buffer_not_finalized)
device, context, queue, num_elements); device, context, queue, num_elements);
} }
REGISTER_TEST(
negative_enqueue_command_buffer_without_simultaneous_no_pending_state)
{
return MakeAndRunTest<
EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState>(
device, context, queue, num_elements);
}
REGISTER_TEST(negative_enqueue_command_buffer_null_queues_num_queues) REGISTER_TEST(negative_enqueue_command_buffer_null_queues_num_queues)
{ {
return MakeAndRunTest<EnqueueCommandBufferNullQueuesNumQueues>( return MakeAndRunTest<EnqueueCommandBufferNullQueuesNumQueues>(

View File

@@ -44,20 +44,9 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
FinalizeCommandBufferNotRecordingState(cl_device_id device, FinalizeCommandBufferNotRecordingState(cl_device_id device,
cl_context context, cl_context context,
cl_command_queue queue) cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), user_event(nullptr) : BasicCommandBufferTest(device, context, queue)
{} {}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
user_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
return CL_SUCCESS;
}
cl_int Run() override cl_int Run() override
{ {
auto verify_state = [&](const cl_command_buffer_state_khr &expected) { auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
@@ -87,18 +76,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
"CL_INVALID_OPERATION", "CL_INVALID_OPERATION",
TEST_FAIL); TEST_FAIL);
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clFinalizeCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
clSetUserEventStatus(user_event, CL_COMPLETE);
clFinish(queue);
return CL_SUCCESS; return CL_SUCCESS;
} }
@@ -114,22 +91,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
return CL_SUCCESS; return CL_SUCCESS;
} }
cl_int EnqueueCommandBuffer()
{
cl_int pattern = 0xE;
cl_int error =
clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
&user_event, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
return CL_SUCCESS;
}
clEventWrapper user_event;
}; };
}; };

View File

@@ -398,16 +398,17 @@ struct CommandNDRangeKernelWithKernelEnqueueCall : public BasicCommandBufferTest
const char* kernel_str = const char* kernel_str =
R"( R"(
__kernel void enqueue_call_func() { __kernel void enqueue_call_func(__global int* out_mem) {
} out_mem[get_global_id(0)] = 0x1234;
}
__kernel void enqueue_call_kernel() {
queue_t def_q = get_default_queue(); __kernel void enqueue_call_kernel(__global int* out_mem) {
ndrange_t ndrange = ndrange_1D(1); queue_t def_q = get_default_queue();
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ndrange_t ndrange = ndrange_1D(1);
^{enqueue_call_func();}); enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
} ^{enqueue_call_func(out_mem);});
)"; }
)";
std::string build_options = std::string(" ") + cl_std; std::string build_options = std::string(" ") + cl_std;
error = create_single_kernel_helper(context, &program, &kernel, 1, error = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -443,7 +444,17 @@ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
cl_int Run() override cl_int Run() override
{ {
cl_int error = clCommandNDRangeKernelKHR( cl_int error = CL_SUCCESS;
clMemWrapper out_mem =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
num_elements * sizeof(cl_int), nullptr, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &out_mem);
test_error(error, "clSetKernelArg failed");
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr); nullptr, 0, nullptr, nullptr, nullptr);

View File

@@ -15,6 +15,7 @@
// //
#include "debug_ahb.h" #include "debug_ahb.h"
#include "harness/errorHelpers.h"
constexpr AHardwareBuffer_UsageFlags flag_list[] = { constexpr AHardwareBuffer_UsageFlags flag_list[] = {
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY, AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
@@ -191,3 +192,27 @@ std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
} }
return result; return result;
} }
AHardwareBuffer *create_AHB(AHardwareBuffer_Desc *desc)
{
AHardwareBuffer *buffer_ptr = nullptr;
int err = AHardwareBuffer_allocate(desc, &buffer_ptr);
if (err != 0)
{
throw std::runtime_error("AHardwareBuffer_allocate failed with code: "
+ std::to_string(err) + "\n");
}
return buffer_ptr;
}
void log_unsupported_ahb_format(AHardwareBuffer_Desc aHardwareBufferDesc)
{
std::string usage_string = ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(aHardwareBufferDesc.usage));
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, %u, "
"layers = %u)\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str(), aHardwareBufferDesc.width,
aHardwareBufferDesc.height, aHardwareBufferDesc.layers);
}

View File

@@ -19,6 +19,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <numeric> #include <numeric>
#include <CL/cl.h>
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \ #define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \ if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
@@ -39,4 +40,91 @@
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format); std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag); std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
std::string std::string
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags); ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);
AHardwareBuffer* create_AHB(AHardwareBuffer_Desc* desc);
void log_unsupported_ahb_format(AHardwareBuffer_Desc desc);
struct AHardwareBufferWrapper
{
AHardwareBuffer* m_ahb;
AHardwareBufferWrapper(): m_ahb(nullptr) {}
AHardwareBufferWrapper(AHardwareBuffer* ahb) { m_ahb = ahb; }
AHardwareBufferWrapper(AHardwareBuffer_Desc* desc)
{
m_ahb = create_AHB(desc);
}
AHardwareBufferWrapper& operator=(AHardwareBuffer* rhs)
{
release();
m_ahb = rhs;
return *this;
}
~AHardwareBufferWrapper() { release(); }
// Copy constructor
AHardwareBufferWrapper(AHardwareBufferWrapper const& ahbw)
: m_ahb(ahbw.m_ahb)
{
retain();
}
// Copy assignment operator
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper const& rhs)
{
release();
m_ahb = rhs.m_ahb;
retain();
return *this;
}
// Move constructor
AHardwareBufferWrapper(AHardwareBufferWrapper&& ahbw)
{
m_ahb = ahbw.m_ahb;
ahbw.m_ahb = nullptr;
}
// Move assignment operator
AHardwareBufferWrapper& operator=(AHardwareBufferWrapper&& rhs)
{
if (this != &rhs)
{
release(); // Giving up current reference
m_ahb = rhs.m_ahb;
rhs.m_ahb = nullptr;
}
return *this;
}
void retain()
{
if (nullptr != m_ahb)
{
AHardwareBuffer_acquire(m_ahb);
}
}
void release()
{
if (nullptr != m_ahb)
{
AHardwareBuffer_release(m_ahb);
}
}
// Usage operators
operator AHardwareBuffer*() { return m_ahb; }
cl_mem_properties get_props()
{
return reinterpret_cast<cl_mem_properties>(m_ahb);
}
};

View File

@@ -97,7 +97,7 @@ static const char *diff_images_kernel_source = {
}; };
// Checks that the inferred image format is correct // Checks that the inferred image format is correct
REGISTER_TEST(test_images) REGISTER_TEST(images)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -134,19 +134,15 @@ REGISTER_TEST(test_images)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, log_info(
&aHardwareBuffer); "Testing %s\n",
if (ahb_result != 0) ahardwareBufferFormatToString(format.aHardwareBufferFormat)
{ .c_str());
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
const cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
cl_mem image = clCreateImageWithProperties( cl_mem image = clCreateImageWithProperties(
@@ -181,8 +177,6 @@ REGISTER_TEST(test_images)
test_error(clReleaseMemObject(image), test_error(clReleaseMemObject(image),
"Failed to release image"); "Failed to release image");
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
} }
} }
} }
@@ -190,7 +184,7 @@ REGISTER_TEST(test_images)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_images_read) REGISTER_TEST(images_read)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -238,15 +232,11 @@ REGISTER_TEST(test_images_read)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, log_info(
&aHardwareBuffer); "Testing %s\n",
if (ahb_result != 0) ahardwareBufferFormatToString(format.aHardwareBufferFormat)
{ .c_str());
log_error("AHardwareBuffer_allocate failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -279,7 +269,7 @@ REGISTER_TEST(test_images_read)
generate_random_image_data(&imageInfo, srcData, seed); generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -301,7 +291,7 @@ REGISTER_TEST(test_images_read)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -394,7 +384,7 @@ REGISTER_TEST(test_images_read)
test_error(err, "clEnqueueNDRangeKernel failed"); test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReleaseExternalMemObjectsKHR( err = clEnqueueReleaseExternalMemObjectsKHR(
queue, 1, &opencl_image, 0, nullptr, nullptr); queue, 1, &imported_image, 0, nullptr, nullptr);
test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed");
// Read buffer and verify // Read buffer and verify
@@ -482,9 +472,6 @@ REGISTER_TEST(test_images_read)
} }
} }
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
} }
} }
} }
@@ -492,7 +479,7 @@ REGISTER_TEST(test_images_read)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_read_image) REGISTER_TEST(enqueue_read_image)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -540,15 +527,12 @@ REGISTER_TEST(test_enqueue_read_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -581,7 +565,7 @@ REGISTER_TEST(test_enqueue_read_image)
generate_random_image_data(&imageInfo, srcData, seed); generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -601,9 +585,9 @@ REGISTER_TEST(test_enqueue_read_image)
return TEST_FAIL; return TEST_FAIL;
} }
const cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -662,9 +646,6 @@ REGISTER_TEST(test_enqueue_read_image)
out_image_ptr += imageInfo.rowPitch; out_image_ptr += imageInfo.rowPitch;
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0) if (total_matched == 0)
{ {
test_fail("Zero bytes matched"); test_fail("Zero bytes matched");
@@ -676,7 +657,7 @@ REGISTER_TEST(test_enqueue_read_image)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_copy_image) REGISTER_TEST(enqueue_copy_image)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -724,15 +705,12 @@ REGISTER_TEST(test_enqueue_copy_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -765,7 +743,7 @@ REGISTER_TEST(test_enqueue_copy_image)
generate_random_image_data(&imageInfo, srcData, seed); generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -787,7 +765,7 @@ REGISTER_TEST(test_enqueue_copy_image)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -975,9 +953,6 @@ REGISTER_TEST(test_enqueue_copy_image)
} }
} }
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
} }
} }
} }
@@ -985,7 +960,7 @@ REGISTER_TEST(test_enqueue_copy_image)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_copy_image_to_buffer) REGISTER_TEST(enqueue_copy_image_to_buffer)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -1033,15 +1008,12 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1074,7 +1046,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
generate_random_image_data(&imageInfo, srcData, seed); generate_random_image_data(&imageInfo, srcData, seed);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -1096,7 +1068,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1165,9 +1137,6 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
out_buffer_ptr += scanlineSize; out_buffer_ptr += scanlineSize;
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0) if (total_matched == 0)
{ {
test_fail("Zero bytes matched"); test_fail("Zero bytes matched");
@@ -1179,7 +1148,7 @@ REGISTER_TEST(test_enqueue_copy_image_to_buffer)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_copy_buffer_to_image) REGISTER_TEST(enqueue_copy_buffer_to_image)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -1227,15 +1196,12 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1275,7 +1241,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1307,7 +1273,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
&hardware_buffer_desc); &hardware_buffer_desc);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -1366,9 +1332,6 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
return TEST_FAIL; return TEST_FAIL;
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0) if (total_matched == 0)
{ {
test_fail("Zero bytes matched"); test_fail("Zero bytes matched");
@@ -1380,7 +1343,7 @@ REGISTER_TEST(test_enqueue_copy_buffer_to_image)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_write_image) REGISTER_TEST(enqueue_write_image)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -1428,15 +1391,12 @@ REGISTER_TEST(test_enqueue_write_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1453,7 +1413,7 @@ REGISTER_TEST(test_enqueue_write_image)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1503,7 +1463,7 @@ REGISTER_TEST(test_enqueue_write_image)
&hardware_buffer_desc); &hardware_buffer_desc);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -1564,9 +1524,6 @@ REGISTER_TEST(test_enqueue_write_image)
return TEST_FAIL; return TEST_FAIL;
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
if (total_matched == 0) if (total_matched == 0)
{ {
test_fail("Zero bytes matched"); test_fail("Zero bytes matched");
@@ -1578,7 +1535,7 @@ REGISTER_TEST(test_enqueue_write_image)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_enqueue_fill_image) REGISTER_TEST(enqueue_fill_image)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
RandomSeed seed(gRandomSeed); RandomSeed seed(gRandomSeed);
@@ -1626,15 +1583,12 @@ REGISTER_TEST(test_enqueue_fill_image)
CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format);
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc,
&aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(format.aHardwareBufferFormat)
log_error("AHardwareBuffer_allocate failed with code %d\n", .c_str());
ahb_result);
return TEST_FAIL;
}
// Determine AHB memory layout // Determine AHB memory layout
AHardwareBuffer_Desc hardware_buffer_desc = {}; AHardwareBuffer_Desc hardware_buffer_desc = {};
@@ -1650,7 +1604,7 @@ REGISTER_TEST(test_enqueue_fill_image)
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
clMemWrapper imported_image = clCreateImageWithProperties( clMemWrapper imported_image = clCreateImageWithProperties(
@@ -1739,7 +1693,7 @@ REGISTER_TEST(test_enqueue_fill_image)
&hardware_buffer_desc); &hardware_buffer_desc);
void *hardware_buffer_data = nullptr; void *hardware_buffer_data = nullptr;
ahb_result = AHardwareBuffer_lock( int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1,
nullptr, &hardware_buffer_data); nullptr, &hardware_buffer_data);
if (ahb_result != 0) if (ahb_result != 0)
@@ -1819,8 +1773,6 @@ REGISTER_TEST(test_enqueue_fill_image)
return TEST_FAIL; return TEST_FAIL;
} }
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
free(verificationLine); free(verificationLine);
if (total_matched == 0) if (total_matched == 0)
@@ -1834,7 +1786,7 @@ REGISTER_TEST(test_enqueue_fill_image)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_blob) REGISTER_TEST(blob)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -1883,19 +1835,17 @@ REGISTER_TEST(test_blob)
continue; continue;
} }
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); log_info(
if (ahb_result != 0) "Testing %s\n",
{ ahardwareBufferFormatToString(
log_error("AHardwareBuffer_allocate failed with code %d\n", static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
ahb_result); .c_str());
return TEST_FAIL;
}
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 aHardwareBuffer.get_props(), 0
}; };
cl_mem buffer = clCreateBufferWithProperties( cl_mem buffer = clCreateBufferWithProperties(
@@ -1903,8 +1853,6 @@ REGISTER_TEST(test_blob)
test_error(err, "Failed to create CL buffer from AHardwareBuffer"); test_error(err, "Failed to create CL buffer from AHardwareBuffer");
test_error(clReleaseMemObject(buffer), "Failed to release buffer"); test_error(clReleaseMemObject(buffer), "Failed to release buffer");
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
} }
return TEST_PASS; return TEST_PASS;

View File

@@ -1,246 +1,398 @@
// //
// Copyright (c) 2025 The Khronos Group Inc. // Copyright (c) 2025 The Khronos Group Inc.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
#include "harness/compat.h" #include "harness/compat.h"
#include "harness/kernelHelpers.h" #include "harness/kernelHelpers.h"
#include "harness/imageHelpers.h" #include "harness/imageHelpers.h"
#include "harness/errorHelpers.h" #include "harness/errorHelpers.h"
#include <android/hardware_buffer.h> #include <android/hardware_buffer.h>
#include "debug_ahb.h" #include "debug_ahb.h"
REGISTER_TEST(test_buffer_format_negative) REGISTER_TEST(buffer_format_negative)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory")) if (!is_extension_available(device, "cl_khr_external_memory"))
{ {
log_info("cl_khr_external_memory is not supported on this platform. " log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
if (!is_extension_available( if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer")) device, "cl_khr_external_memory_android_hardware_buffer"))
{ {
log_info("cl_khr_external_memory_android_hardware_buffer is not " log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. " "supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = 64; aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 1; aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1; aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{ {
const std::string usage_string = const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString( ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>( static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage)); aHardwareBufferDesc.usage));
log_info( log_info(
"Unsupported format %s, usage flags %s\n", "Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString( ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format)) static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(), .c_str(),
usage_string.c_str()); usage_string.c_str());
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result = const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0) if (ahb_result != 0)
{ {
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL; return TEST_FAIL;
} }
log_info("Testing %s\n", log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>( ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format)) aHardwareBufferDesc.format))
.c_str()); .c_str());
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
}; };
cl_mem buffer = clCreateBufferWithProperties( cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_assert_error(err == CL_INVALID_OPERATION, test_assert_error(err == CL_INVALID_OPERATION,
"To create a buffer the aHardwareFormat must be " "To create a buffer the aHardwareFormat must be "
"AHARDWAREBUFFER_FORMAT_BLOB"); "AHARDWAREBUFFER_FORMAT_BLOB");
if (buffer != nullptr) if (buffer != nullptr)
{ {
test_error(clReleaseMemObject(buffer), "Failed to release buffer"); test_error(clReleaseMemObject(buffer), "Failed to release buffer");
} }
AHardwareBuffer_release(aHardwareBuffer); AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr; aHardwareBuffer = nullptr;
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_buffer_size_negative) REGISTER_TEST(buffer_size_negative)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
constexpr size_t buffer_size = 64; constexpr size_t buffer_size = 64;
if (!is_extension_available(device, "cl_khr_external_memory")) if (!is_extension_available(device, "cl_khr_external_memory"))
{ {
log_info("cl_khr_external_memory is not supported on this platform. " log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
if (!is_extension_available( if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer")) device, "cl_khr_external_memory_android_hardware_buffer"))
{ {
log_info("cl_khr_external_memory_android_hardware_buffer is not " log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. " "supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = buffer_size; aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1; aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1; aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{ {
const std::string usage_string = const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString( ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>( static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage)); aHardwareBufferDesc.usage));
log_info( log_info(
"Unsupported format %s, usage flags %s\n", "Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString( ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format)) static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(), .c_str(),
usage_string.c_str()); usage_string.c_str());
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result = const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0) if (ahb_result != 0)
{ {
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL; return TEST_FAIL;
} }
log_info("Testing %s\n", log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>( ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format)) aHardwareBufferDesc.format))
.c_str()); .c_str());
cl_mem_properties props[] = { cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
}; };
cl_mem buffer = clCreateBufferWithProperties( cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err); context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err);
test_assert_error(err == CL_INVALID_BUFFER_SIZE, test_assert_error(err == CL_INVALID_BUFFER_SIZE,
"Wrong error value returned"); "Wrong error value returned");
if (buffer != nullptr) if (buffer != nullptr)
{ {
test_error(clReleaseMemObject(buffer), "Failed to release buffer"); test_error(clReleaseMemObject(buffer), "Failed to release buffer");
} }
AHardwareBuffer_release(aHardwareBuffer); AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr; aHardwareBuffer = nullptr;
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(test_images_negative) REGISTER_TEST(images_negative)
{ {
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory")) if (!is_extension_available(device, "cl_khr_external_memory"))
{ {
log_info("cl_khr_external_memory is not supported on this platform. " log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
if (!is_extension_available( if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer")) device, "cl_khr_external_memory_android_hardware_buffer"))
{ {
log_info("cl_khr_external_memory_android_hardware_buffer is not " log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. " "supported on this platform. "
"Skipping test.\n"); "Skipping test.\n");
return TEST_SKIPPED_ITSELF; return TEST_SKIPPED_ITSELF;
} }
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>( aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
| AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER); | AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER);
aHardwareBufferDesc.width = 64; aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 64; aHardwareBufferDesc.height = 64;
aHardwareBufferDesc.layers = 1; aHardwareBufferDesc.layers = 1;
AHardwareBuffer *aHardwareBuffer = nullptr; AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result = int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0) if (ahb_result != 0)
{ {
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL; return TEST_FAIL;
} }
const cl_mem_properties props[] = { const cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0 reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
}; };
constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 }; constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
cl_mem image = cl_mem image =
clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
&image_format, nullptr, nullptr, &err); &image_format, nullptr, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
"Wrong error value returned"); "Wrong error value returned");
if (image != nullptr) if (image != nullptr)
{ {
test_error(clReleaseMemObject(image), "Failed to release image"); test_error(clReleaseMemObject(image), "Failed to release image");
} }
constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 }; constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 };
image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
nullptr, &image_desc, nullptr, &err); nullptr, &image_desc, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR, test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR,
"Wrong error value returned"); "Wrong error value returned");
if (image != nullptr) if (image != nullptr)
{ {
test_error(clReleaseMemObject(image), "Failed to release image"); test_error(clReleaseMemObject(image), "Failed to release image");
} }
AHardwareBuffer_release(aHardwareBuffer); AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr; aHardwareBuffer = nullptr;
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(invalid_arguments)
{
cl_int err;
constexpr cl_uint buffer_size = 4096;
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
AHardwareBufferWrapper ahb_buffer(&aHardwareBufferDesc);
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 64;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
AHardwareBufferWrapper ahb_image(&aHardwareBufferDesc);
const cl_mem_properties props_buffer[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
ahb_buffer.get_props(),
0,
};
const cl_mem_properties props_image[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
ahb_image.get_props(),
0,
};
// stub values
cl_image_format image_format = { 0 };
cl_image_desc image_desc = { 0 };
int host_data = 0;
void *host_ptr = reinterpret_cast<void *>(&host_data);
log_info("Testing buffer error conditions\n");
// Buffer error conditions
clMemWrapper mem =
clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
buffer_size + 1, nullptr, &err);
if (CL_INVALID_BUFFER_SIZE != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_BUFFER_SIZE "
"but returned %s: CL_INVALID_BUFFER_SIZE if size is non-zero and "
"greater than the AHardwareBuffer when importing external memory "
"using cl_khr_external_memory_android_hardware_buffer\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateBufferWithProperties(context, props_image, CL_MEM_READ_WRITE,
0, nullptr, &err);
if (CL_INVALID_OPERATION != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_OPERATION "
"but returned %s: CL_INVALID_OPERATION if the AHardwareBuffer "
"format is not AHARDWAREBUFFER_FORMAT_BLOB\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateBufferWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
0, host_ptr, &err);
if (CL_INVALID_HOST_PTR != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
IGetErrorString(err));
return TEST_FAIL;
}
log_info("Testing image error conditions\n");
// Image error conditions
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
&image_format, nullptr, nullptr, &err);
if (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR != err)
{
log_error(
"clCreateBufferWithProperties should return "
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR but returned %s: "
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if image_format is not NULL "
"when using cl_khr_external_memory_android_hardware_buffer.\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
nullptr, &image_desc, nullptr, &err);
if (CL_INVALID_IMAGE_DESCRIPTOR != err)
{
log_error("clCreateBufferWithProperties should return "
"CL_INVALID_IMAGE_DESCRIPTOR but returned %s: "
"CL_INVALID_IMAGE_DESCRIPTOR if image_desc is not NULL when "
"using cl_khr_external_memory_android_hardware_buffer.\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_buffer, CL_MEM_READ_WRITE,
nullptr, nullptr, nullptr, &err);
if (CL_IMAGE_FORMAT_NOT_SUPPORTED != err)
{
log_error("clCreateBufferWithProperties should return "
"CL_IMAGE_FORMAT_NOT_SUPPORTED but returned %s: "
"CL_IMAGE_FORMAT_NOT_SUPPORTED if AHardwareBuffer's format "
"is not supported\n",
IGetErrorString(err));
return TEST_FAIL;
}
mem = clCreateImageWithProperties(context, props_image, CL_MEM_READ_WRITE,
nullptr, nullptr, host_ptr, &err);
if (CL_INVALID_HOST_PTR != err)
{
log_error(
"clCreateBufferWithProperties should return CL_INVALID_HOST_PTR "
"but returned %s: CL_INVALID_HOST_PTR if host_ptr is not NULL\n",
IGetErrorString(err));
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -0,0 +1,26 @@
if (WIN32)
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR})
link_directories(${CL_LIB_DIR})
list(APPEND CLConform_LIBRARIES directx_wrapper)
set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE)
set(${MODULE_NAME}_SOURCES
main.cpp
test_external_semaphore_dx_fence.cpp
test_external_semaphore_dx_fence_negative_wait_signal.cpp
test_external_semaphore_dx_fence_queries.cpp
test_external_semaphore_dx_fence_export.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories("../../common/directx_wrapper")
include(../../CMakeCommon.txt)
endif (WIN32)

View File

@@ -0,0 +1,22 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0);
}

View File

@@ -0,0 +1,117 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include "harness/errorHelpers.h"
#include "directx_wrapper.hpp"
class CLDXSemaphoreWrapper {
public:
CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
ID3D12Device* dx_device)
: device(device), context(context), dx_device(dx_device){};
int createSemaphoreFromFence(ID3D12Fence* fence)
{
cl_int errcode = CL_SUCCESS;
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
const HRESULT hr = dx_device->CreateSharedHandle(
fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
return CL_SUCCESS;
}
~CLDXSemaphoreWrapper()
{
releaseSemaphore();
if (fence_handle)
{
CloseHandle(fence_handle);
}
};
const cl_semaphore_khr* operator&() const { return &semaphore; };
cl_semaphore_khr operator*() const { return semaphore; };
HANDLE getHandle() const { return fence_handle; };
private:
cl_semaphore_khr semaphore;
ComPtr<ID3D12Fence> fence;
HANDLE fence_handle;
cl_device_id device;
cl_context context;
ComPtr<ID3D12Device> dx_device;
int releaseSemaphore() const
{
GET_PFN(device, clReleaseSemaphoreKHR);
if (semaphore)
{
clReleaseSemaphoreKHR(semaphore);
}
return CL_SUCCESS;
}
};
static bool
is_import_handle_available(cl_device_id device,
const cl_external_memory_handle_type_khr handle_type)
{
int errcode = CL_SUCCESS;
size_t import_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
nullptr, &import_types_size);
if (errcode != CL_SUCCESS)
{
log_error("Could not query import semaphore handle types");
return false;
}
std::vector<cl_external_semaphore_handle_type_khr> import_types(
import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
import_types_size, import_types.data(), nullptr);
if (errcode != CL_SUCCESS)
{
log_error("Could not query import semaphore handle types");
return false;
}
return std::find(import_types.begin(), import_types.end(), handle_type)
!= import_types.end();
}

View File

@@ -0,0 +1,324 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a signal followed by a wait in OpenCL will complete successfully
REGISTER_TEST(test_external_semaphores_signal_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to signal semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to wait semaphore");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
// Verify that the events completed.
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_fence->Signal()\n");
const HRESULT hr = (*fence)->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
const HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
return TEST_PASS;
}
// Confirm that interlocking waits between OpenCL and DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling d3d12_command_queue->Wait(1)\n");
cl_semaphore_payload_khr semaphore_payload = 1;
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling d3d12_command_queue->Signal(2)\n");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
semaphore_payload += 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
test_assert_event_complete(signal_event);
return TEST_PASS;
}
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
"Could not create semaphore");
const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
"Could not create semaphore");
const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
cl_semaphore_payload_khr semaphore_payload_list[] = {
semaphore_payload, semaphore_payload + 1
};
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr,
&wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
semaphore_payload_list[0] = semaphore_payload + 3;
semaphore_payload_list[1] = semaphore_payload + 2;
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr,
&signal_event);
test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
// Wait until the GPU has completed commands up to this fence point.
log_info("Waiting for D3D12 command queue completion\n");
if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 1 event on completion");
WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle);
}
if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 2 event on completion");
WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle);
}
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(signal_event);
return TEST_PASS;
}

View File

@@ -0,0 +1,220 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a wait followed by a signal in DirectX 12 using an exported
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
export_types_size, export_types.data(), nullptr);
test_error(errcode, "Could not query export semaphore handle types");
if (std::find(export_types.begin(), export_types.end(),
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to wait semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling fence->Signal()\n");
const HRESULT hr = fence->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(wait_event);
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
return TEST_PASS;
}
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
export_types_size, export_types.data(), nullptr);
test_error(errcode, "Could not query export semaphore handle types");
if (std::find(export_types.begin(), export_types.end(),
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
test_error(errcode, "Failed to signal semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling WaitForSingleObject\n");
if (fence->GetCompletedValue() < semaphore_payload)
{
const HANDLE event =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = fence->SetEventOnCompletion(semaphore_payload, event);
test_error(FAILED(hr), "Failed to set event on completion");
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
errcode = clFinish(queue);
test_error(errcode, "Could not finish queue");
test_assert_event_complete(signal_event);
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
return TEST_PASS;
}

View File

@@ -0,0 +1,89 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that a wait without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
test_assert_error(
errcode == CL_INVALID_VALUE,
"Unexpected error code returned from clEnqueueWaitSemaphores");
return TEST_PASS;
}
// Confirm that a signal without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
test_assert_error(
errcode == CL_INVALID_VALUE,
"Unexpected error code returned from clEnqueueSignalSemaphores");
return TEST_PASS;
}

View File

@@ -0,0 +1,69 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_dx_fence_base.h"
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
size_t properties_size_bytes = 0;
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
nullptr, &properties_size_bytes);
test_error(errcode, "Could not get semaphore info");
std::vector<cl_semaphore_properties_khr> semaphore_properties(
properties_size_bytes / sizeof(cl_semaphore_properties_khr));
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
properties_size_bytes,
semaphore_properties.data(), nullptr);
test_error(errcode, "Could not get semaphore info");
for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
{
if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
&& semaphore_properties[i + 1]
== reinterpret_cast<cl_semaphore_properties_khr>(
semaphore.getHandle()))
{
return TEST_PASS;
}
}
log_error(
"Failed to find the dx fence handle type in the semaphore properties");
return TEST_FAIL;
}

View File

@@ -35,7 +35,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0); int wgid = get_group_id(0);
int wgsize = get_local_size(0); int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0); if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@@ -47,12 +47,12 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((wgid % 2) == 0) if ((wgid % 2) == 0)
ptr = localPtr; ptr = localPtr;
int inc = atomic_fetch_add(ptr, 1); int inc = atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
// In the cases where the local memory ptr was used, // In the cases where the local memory ptr was used,
// save off the final value. // save off the final value.
if ((wgid % 2) == 0 && inc == (wgsize-1)) if ((wgid % 2) == 0 && inc == (wgsize-1))
atomic_store(&globalPtr[wgid], inc); atomic_store_explicit(&globalPtr[wgid], inc, memory_order_relaxed, memory_scope_work_group);
} }
)OpenCLC"; )OpenCLC";
@@ -67,7 +67,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0); int wgid = get_group_id(0);
int wgsize = get_local_size(0); int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0); if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@@ -79,14 +79,17 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((tid % 2) == 0) if ((tid % 2) == 0)
ptr = localPtr; ptr = localPtr;
atomic_fetch_add(ptr, 1); atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
// In the cases where the local memory ptr was used, // In the cases where the local memory ptr was used,
// save off the final value. // save off the final value.
if (tid == 0) if (tid == 0)
atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr)); atomic_store_explicit(&globalPtr[(wgid * 2) + 1],
atomic_load_explicit(localPtr, memory_order_relaxed, memory_scope_work_group),
memory_order_relaxed,
memory_scope_work_group);
} }
)OpenCLC"; )OpenCLC";
} }

View File

@@ -200,7 +200,7 @@ int main(int argc, const char *argv[])
} }
} }
if (argc > 1 && strcmp(argv[1], "-list") == 0) if (gListTests)
{ {
log_info("Available 2.x tests:\n"); log_info("Available 2.x tests:\n");
for (int i = 0; i < test_num; i++) for (int i = 0; i < test_num; i++)

BIN
test_conformance/spir/half.zip Normal file → Executable file

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
// //
// Copyright (c) 2017 The Khronos Group Inc. // Copyright (c) 2017 The Khronos Group Inc.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
@@ -43,35 +43,37 @@
// //
// Task // Task
// //
Task::Task(cl_device_id device, const char* options): Task::Task(cl_device_id device, const char* options): m_devid(device)
m_devid(device) { {
if (options) if (options) m_options = options;
m_options = options;
} }
Task::~Task() {} Task::~Task() {}
const char* Task::getErrorLog() const { const char* Task::getErrorLog() const { return m_log.c_str(); }
return m_log.c_str();
}
void Task::setErrorLog(cl_program prog) { void Task::setErrorLog(cl_program prog)
{
size_t len = 0; size_t len = 0;
std::vector<char> log; std::vector<char> log;
cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG,
if(err_code != CL_SUCCESS) 0, NULL, &len);
if (err_code != CL_SUCCESS)
{ {
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) failed.\n"; m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) "
"failed.\n";
return; return;
} }
log.resize(len, 0); log.resize(len, 0);
err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len, &log[0], NULL); err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len,
if(err_code != CL_SUCCESS) &log[0], NULL);
if (err_code != CL_SUCCESS)
{ {
m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) failed.\n"; m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) "
"failed.\n";
return; return;
} }
m_log.append(&log[0]); m_log.append(&log[0]);
@@ -84,10 +86,11 @@ BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options)
: Task(dev, options), m_program(prog) : Task(dev, options), m_program(prog)
{} {}
bool BuildTask::execute() { bool BuildTask::execute()
cl_int err_code = clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL); {
if(CL_SUCCESS == err_code) cl_int err_code =
return true; clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
if (CL_SUCCESS == err_code) return true;
setErrorLog(m_program); setErrorLog(m_program);
return false; return false;
@@ -96,8 +99,10 @@ bool BuildTask::execute() {
// //
// SpirBuildTask // SpirBuildTask
// //
SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev, const char* options) : SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev,
BuildTask(prog, dev, options) {} const char* options)
: BuildTask(prog, dev, options)
{}
// //
// CompileTask // CompileTask
@@ -107,47 +112,43 @@ CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options)
: Task(dev, options), m_program(prog) : Task(dev, options), m_program(prog)
{} {}
void CompileTask::addHeader(const char* hname, cl_program hprog) { void CompileTask::addHeader(const char* hname, cl_program hprog)
{
m_headers.push_back(std::make_pair(hname, hprog)); m_headers.push_back(std::make_pair(hname, hprog));
} }
const char* first(std::pair<const char*,cl_program>& p) { const char* first(std::pair<const char*, cl_program>& p) { return p.first; }
return p.first;
}
cl_program second(const std::pair<const char*, cl_program>& p) { cl_program second(const std::pair<const char*, cl_program>& p)
{
return p.second; return p.second;
} }
bool CompileTask::execute() { bool CompileTask::execute()
{
// Generating the header names vector. // Generating the header names vector.
std::vector<const char*> names; std::vector<const char*> names;
std::transform(m_headers.begin(), m_headers.end(), names.begin(), first); std::transform(m_headers.begin(), m_headers.end(), names.begin(), first);
// Generating the header programs vector. // Generating the header programs vector.
std::vector<cl_program> programs; std::vector<cl_program> programs;
std::transform(m_headers.begin(), m_headers.end(), programs.begin(), second); std::transform(m_headers.begin(), m_headers.end(), programs.begin(),
second);
const char** h_names = NULL; const char** h_names = NULL;
const cl_program* h_programs = NULL; const cl_program* h_programs = NULL;
if (!m_headers.empty()) if (!m_headers.empty())
{ {
h_programs = &programs[0]; h_programs = &programs[0];
h_names = &names[0]; h_names = &names[0];
} }
// Compiling with the headers. // Compiling with the headers.
cl_int err_code = clCompileProgram( cl_int err_code =
m_program, clCompileProgram(m_program, 1U, &m_devid, m_options.c_str(),
1U, m_headers.size(), // # of headers
&m_devid, h_programs, h_names, NULL, NULL);
m_options.c_str(), if (CL_SUCCESS == err_code) return true;
m_headers.size(), // # of headers
h_programs,
h_names,
NULL, NULL);
if (CL_SUCCESS == err_code)
return true;
setErrorLog(m_program); setErrorLog(m_program);
return false; return false;
@@ -156,8 +157,10 @@ bool CompileTask::execute() {
// //
// SpirCompileTask // SpirCompileTask
// //
SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev, const char* options) : SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev,
CompileTask(prog, dev, options) {} const char* options)
: CompileTask(prog, dev, options)
{}
// //
@@ -169,13 +172,16 @@ LinkTask::LinkTask(cl_program* programs, int num_programs, cl_context ctxt,
m_numPrograms(num_programs), m_context(ctxt) m_numPrograms(num_programs), m_context(ctxt)
{} {}
bool LinkTask::execute() { bool LinkTask::execute()
{
cl_int err_code; cl_int err_code;
int i; int i;
for(i = 0; i < m_numPrograms; ++i) for (i = 0; i < m_numPrograms; ++i)
{ {
err_code = clCompileProgram(m_programs[i], 1, &m_devid, "-x spir -spir-std=1.2 -cl-kernel-arg-info", 0, NULL, NULL, NULL, NULL); err_code = clCompileProgram(m_programs[i], 1, &m_devid,
"-x spir -spir-std=1.2 -cl-kernel-arg-info",
0, NULL, NULL, NULL, NULL);
if (CL_SUCCESS != err_code) if (CL_SUCCESS != err_code)
{ {
setErrorLog(m_programs[i]); setErrorLog(m_programs[i]);
@@ -183,91 +189,78 @@ bool LinkTask::execute() {
} }
} }
m_executable = clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms, m_programs, NULL, NULL, &err_code); m_executable =
if (CL_SUCCESS == err_code) clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms,
return true; m_programs, NULL, NULL, &err_code);
if (CL_SUCCESS == err_code) return true;
if(m_executable) setErrorLog(m_executable); if (m_executable) setErrorLog(m_executable);
return false; return false;
} }
cl_program LinkTask::getExecutable() const { cl_program LinkTask::getExecutable() const { return m_executable; }
return m_executable;
}
LinkTask::~LinkTask() { LinkTask::~LinkTask()
if(m_executable) clReleaseProgram(m_executable); {
if (m_executable) clReleaseProgram(m_executable);
} }
// //
// KernelEnumerator // KernelEnumerator
// //
void KernelEnumerator::process(cl_program prog) { void KernelEnumerator::process(cl_program prog)
{
const size_t MAX_KERNEL_NAME = 64; const size_t MAX_KERNEL_NAME = 64;
size_t num_kernels; size_t num_kernels;
cl_int err_code = clGetProgramInfo( cl_int err_code = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS,
prog, sizeof(size_t), &num_kernels, NULL);
CL_PROGRAM_NUM_KERNELS, if (CL_SUCCESS != err_code) return;
sizeof(size_t),
&num_kernels,
NULL
);
if (CL_SUCCESS != err_code)
return;
// Querying for the number of kernels. // Querying for the number of kernels.
size_t buffer_len = sizeof(char)*num_kernels*MAX_KERNEL_NAME; size_t buffer_len = sizeof(char) * num_kernels * MAX_KERNEL_NAME;
char* kernel_names = new char[buffer_len]; char* kernel_names = new char[buffer_len];
memset(kernel_names, '\0', buffer_len); memset(kernel_names, '\0', buffer_len);
size_t str_len = 0; size_t str_len = 0;
err_code = clGetProgramInfo( err_code = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, buffer_len,
prog, (void*)kernel_names, &str_len);
CL_PROGRAM_KERNEL_NAMES, if (CL_SUCCESS != err_code) return;
buffer_len,
(void *)kernel_names,
&str_len
);
if (CL_SUCCESS != err_code)
return;
//parsing the names and inserting them to the list // parsing the names and inserting them to the list
std::string names(kernel_names); std::string names(kernel_names);
assert (str_len == 1+names.size() && "incompatible string lengths"); assert(str_len == 1 + names.size() && "incompatible string lengths");
size_t offset = 0; size_t offset = 0;
for(size_t i=0 ; i<names.size() ; ++i){ for (size_t i = 0; i < names.size(); ++i)
//kernel names are separated by semi colons {
if (names[i] == ';'){ // kernel names are separated by semi colons
m_kernels.push_back(names.substr(offset, i-offset)); if (names[i] == ';')
offset = i+1; {
m_kernels.push_back(names.substr(offset, i - offset));
offset = i + 1;
} }
} }
m_kernels.push_back(names.substr(offset, names.size()-offset)); m_kernels.push_back(names.substr(offset, names.size() - offset));
delete[] kernel_names; delete[] kernel_names;
} }
KernelEnumerator::KernelEnumerator(cl_program prog) { KernelEnumerator::KernelEnumerator(cl_program prog) { process(prog); }
process(prog);
}
KernelEnumerator::iterator KernelEnumerator::begin(){ KernelEnumerator::iterator KernelEnumerator::begin()
{
return m_kernels.begin(); return m_kernels.begin();
} }
KernelEnumerator::iterator KernelEnumerator::end(){ KernelEnumerator::iterator KernelEnumerator::end() { return m_kernels.end(); }
return m_kernels.end();
}
size_t KernelEnumerator::size() const { size_t KernelEnumerator::size() const { return m_kernels.size(); }
return m_kernels.size();
}
/** /**
Run the single test - run the test for both CL and SPIR versions of the kernel Run the single test - run the test for both CL and SPIR versions of the kernel
*/ */
static bool run_test(cl_context context, cl_command_queue queue, cl_program clprog, static bool run_test(cl_context context, cl_command_queue queue,
cl_program bcprog, const std::string& kernel_name, std::string& err, const cl_device_id device, cl_program clprog, cl_program bcprog,
float ulps) const std::string& kernel_name, std::string& err,
const cl_device_id device, float ulps)
{ {
WorkSizeInfo ws; WorkSizeInfo ws;
TestResult cl_result; TestResult cl_result;
@@ -276,28 +269,29 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
{ {
// make sure that the kernel will be released before the program // make sure that the kernel will be released before the program
clKernelWrapper kernel = create_kernel_helper(clprog, kernel_name); clKernelWrapper kernel = create_kernel_helper(clprog, kernel_name);
// based on the kernel characteristics, we are generating and initializing the arguments for both phases (cl and bc executions) // based on the kernel characteristics, we are generating and
// initializing the arguments for both phases (cl and bc executions)
generate_kernel_data(context, kernel, ws, cl_result); generate_kernel_data(context, kernel, ws, cl_result);
bc_result.reset(cl_result.clone(context, ws, kernel, device)); bc_result.reset(cl_result.clone(context, ws, kernel, device));
assert (compare_results(cl_result, *bc_result, ulps) && "not equal?"); assert(compare_results(cl_result, *bc_result, ulps) && "not equal?");
run_kernel( kernel, queue, ws, cl_result ); run_kernel(kernel, queue, ws, cl_result);
} }
// now, run the single BC test // now, run the single BC test
{ {
// make sure that the kernel will be released before the program // make sure that the kernel will be released before the program
clKernelWrapper kernel = create_kernel_helper(bcprog, kernel_name); clKernelWrapper kernel = create_kernel_helper(bcprog, kernel_name);
run_kernel( kernel, queue, ws, *bc_result ); run_kernel(kernel, queue, ws, *bc_result);
} }
int error = clFinish(queue); int error = clFinish(queue);
if( CL_SUCCESS != error) if (CL_SUCCESS != error)
{ {
err = "clFinish failed\n"; err = "clFinish failed\n";
return false; return false;
} }
// compare the results // compare the results
if( !compare_results(cl_result, *bc_result, ulps) ) if (!compare_results(cl_result, *bc_result, ulps))
{ {
err = " (result diff in kernel '" + kernel_name + "')."; err = " (result diff in kernel '" + kernel_name + "').";
return false; return false;
@@ -308,16 +302,16 @@ static bool run_test(cl_context context, cl_command_queue queue, cl_program clpr
/** /**
Get the maximum relative error defined as ULP of floating-point math functions Get the maximum relative error defined as ULP of floating-point math functions
*/ */
static float get_max_ulps(const char *test_name) static float get_max_ulps(const char* test_name)
{ {
float ulps = 0.f; float ulps = 0.f;
// Get ULP values from math_brute_force functionList // Get ULP values from math_brute_force functionList
if (strstr(test_name, "math_kernel")) if (strstr(test_name, "math_kernel"))
{ {
for( size_t i = 0; i < functionListCount; i++ ) for (size_t i = 0; i < functionListCount; i++)
{ {
char name[64]; char name[64];
const Func *func = &functionList[ i ]; const Func* func = &functionList[i];
sprintf(name, ".%s_float", func->name); sprintf(name, ".%s_float", func->name);
if (strstr(test_name, name)) if (strstr(test_name, name))
{ {
@@ -336,16 +330,17 @@ static float get_max_ulps(const char *test_name)
return ulps; return ulps;
} }
TestRunner::TestRunner(EventHandler *success, EventHandler *failure, TestRunner::TestRunner(EventHandler* success, EventHandler* failure,
const OclExtensions& devExt): const OclExtensions& devExt)
m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt) {} : m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt)
{}
/** /**
Based on the test name build the cl file name, the bc file name and execute Based on the test name build the cl file name, the bc file name and execute
the kernel for both modes (cl and bc). the kernel for both modes (cl and bc).
*/ */
bool TestRunner::runBuildTest(cl_device_id device, const char *folder, bool TestRunner::runBuildTest(cl_device_id device, const char* folder,
const char *test_name, cl_uint size_t_width) const char* test_name, cl_uint size_t_width)
{ {
int failures = 0; int failures = 0;
// Composing the name of the CSV file. // Composing the name of the CSV file.
@@ -365,28 +360,35 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
cl_bool images3D = khrDb.isImages3DRequired(folder, test_name); cl_bool images3D = khrDb.isImages3DRequired(folder, test_name);
char deviceProfile[64]; char deviceProfile[64];
clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile), &deviceProfile, NULL); clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile),
&deviceProfile, NULL);
std::string device_profile(deviceProfile, 64); std::string device_profile(deviceProfile, 64);
if(images == CL_TRUE && checkForImageSupport(device) != 0) if (images == CL_TRUE && checkForImageSupport(device) != 0)
{ {
(*m_successHandler)(test_name, ""); (*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to Images is not supported)." << std::endl; std::cout
<< "Skipped. (Cannot run on device due to Images is not supported)."
<< std::endl;
return true; return true;
} }
if(images3D == CL_TRUE && checkFor3DImageSupport(device) != 0) if (images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
{ {
(*m_successHandler)(test_name, ""); (*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device as 3D images are not supported)." << std::endl; std::cout
<< "Skipped. (Cannot run on device as 3D images are not supported)."
<< std::endl;
return true; return true;
} }
OclExtensions requiredExt = khrDb.getRequiredExtensions(folder, test_name); OclExtensions requiredExt = khrDb.getRequiredExtensions(folder, test_name);
if(!m_devExt->supports(requiredExt)) if (!m_devExt->supports(requiredExt))
{ {
(*m_successHandler)(test_name, ""); (*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to missing extensions: " << m_devExt->get_missing(requiredExt) << " )." << std::endl; std::cout
<< "Skipped. (Cannot run on device due to missing extensions: "
<< m_devExt->get_missing(requiredExt) << " )." << std::endl;
return true; return true;
} }
@@ -409,17 +411,26 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
cl_device_fp_config gFloatCapabilities = 0; cl_device_fp_config gFloatCapabilities = 0;
cl_int err; cl_int err;
if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL))) if ((err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(gFloatCapabilities), &gFloatCapabilities,
NULL)))
{ {
log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", err); log_info("Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
err);
} }
if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr")) { if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr"))
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0) { {
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
{
(*m_successHandler)(test_name, ""); (*m_successHandler)(test_name, "");
std::cout << "Skipped. (Cannot run on device due to missing CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)" << std::endl; std::cout << "Skipped. (Cannot run on device due to missing "
"CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT property.)"
<< std::endl;
return true; return true;
} else { }
else
{
bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt"; bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt";
cloptions += " -cl-fp32-correctly-rounded-divide-sqrt"; cloptions += " -cl-fp32-correctly-rounded-divide-sqrt";
} }
@@ -427,33 +438,39 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
// Building the programs. // Building the programs.
BuildTask clBuild(clprog, device, cloptions.c_str()); BuildTask clBuild(clprog, device, cloptions.c_str());
if (!clBuild.execute()) { if (!clBuild.execute())
{
std::cerr << clBuild.getErrorLog() << std::endl; std::cerr << clBuild.getErrorLog() << std::endl;
(*m_failureHandler)(test_name, "");
return false; return false;
} }
SpirBuildTask bcBuild(bcprog, device, bcoptions.c_str()); SpirBuildTask bcBuild(bcprog, device, bcoptions.c_str());
if (!bcBuild.execute()) { if (!bcBuild.execute())
{
std::cerr << bcBuild.getErrorLog() << std::endl; std::cerr << bcBuild.getErrorLog() << std::endl;
(*m_failureHandler)(test_name, "");
return false; return false;
} }
KernelEnumerator clkernel_enumerator(clprog), KernelEnumerator clkernel_enumerator(clprog), bckernel_enumerator(bcprog);
bckernel_enumerator(bcprog); if (clkernel_enumerator.size() != bckernel_enumerator.size())
if (clkernel_enumerator.size() != bckernel_enumerator.size()) { {
std::cerr << "number of kernels in test" << test_name std::cerr << "number of kernels in test" << test_name
<< " doesn't match in bc and cl files" << std::endl; << " doesn't match in bc and cl files" << std::endl;
(*m_failureHandler)(test_name, "");
return false; return false;
} }
KernelEnumerator::iterator it = clkernel_enumerator.begin(), KernelEnumerator::iterator it = clkernel_enumerator.begin(),
e = clkernel_enumerator.end(); e = clkernel_enumerator.end();
while (it != e) while (it != e)
{ {
std::string kernel_name = *it++; std::string kernel_name = *it++;
std::string err; std::string err;
try try
{ {
bool success = run_test(context, queue, clprog, bcprog, kernel_name, err, device, ulps); bool success = run_test(context, queue, clprog, bcprog, kernel_name,
err, device, ulps);
if (success) if (success)
{ {
log_info("kernel '%s' passed.\n", kernel_name.c_str()); log_info("kernel '%s' passed.\n", kernel_name.c_str());
@@ -468,7 +485,8 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
} catch (const std::runtime_error& err) } catch (const std::runtime_error& err)
{ {
++failures; ++failures;
log_info("kernel '%s' failed: %s\n", kernel_name.c_str(), err.what()); log_info("kernel '%s' failed: %s\n", kernel_name.c_str(),
err.what());
(*m_failureHandler)(test_name, kernel_name); (*m_failureHandler)(test_name, kernel_name);
} }
} }
@@ -476,4 +494,3 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
log_info("%s %s\n", test_name, failures ? "FAILED" : "passed."); log_info("%s %s\n", test_name, failures ? "FAILED" : "passed.");
return failures == 0; return failures == 0;
} }

View File

@@ -17,7 +17,7 @@ else()
add_custom_command( add_custom_command(
OUTPUT ${VULKAN_TEST_RESOURCES}/buffer.spv OUTPUT ${VULKAN_TEST_RESOURCES}/buffer.spv
COMMAND ${Vulkan_glslang_binary} COMMAND ${Vulkan_glslang_binary}
--target-env vulkan1.0 --target-env vulkan1.1
-o ${VULKAN_TEST_RESOURCES}/buffer.spv -o ${VULKAN_TEST_RESOURCES}/buffer.spv
${CMAKE_CURRENT_SOURCE_DIR}/buffer.comp ${CMAKE_CURRENT_SOURCE_DIR}/buffer.comp
DEPENDS buffer.comp DEPENDS buffer.comp
@@ -35,7 +35,7 @@ else()
add_custom_command( add_custom_command(
OUTPUT ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv OUTPUT ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
COMMAND ${Vulkan_glslang_binary} COMMAND ${Vulkan_glslang_binary}
--target-env vulkan1.0 --target-env vulkan1.1
-o ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv -o ${VULKAN_TEST_RESOURCES}/image2D_${GLSL_FORMAT}.spv
${CMAKE_CURRENT_BINARY_DIR}/image2D_${GLSL_FORMAT}.comp ${CMAKE_CURRENT_BINARY_DIR}/image2D_${GLSL_FORMAT}.comp
DEPENDS image2D_${GLSL_FORMAT}.comp DEPENDS image2D_${GLSL_FORMAT}.comp