Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2026-03-25 12:05:00 -07:00
90 changed files with 5595 additions and 1710 deletions

View File

@@ -28,7 +28,7 @@ jobs:
arch: android-aarch64
android_arch_abi: arm64-v8a
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: Setup Ninja
uses: seanmiddleditch/gha-setup-ninja@master
- name: Install Arm and AArch64 compilers
@@ -184,7 +184,7 @@ jobs:
steps:
- name: Install packages
run: sudo apt install -y clang-format clang-format-14
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Check code format

View File

@@ -81,14 +81,17 @@ include(CheckFunctionExists)
include(CheckIncludeFiles)
include(CheckCXXCompilerFlag)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(CLConform_TARGET_ARCH ARM)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" OR
CMAKE_VS_PLATFORM_NAME MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
set(CLConform_TARGET_ARCH ARM64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(CLConform_TARGET_ARCH ARM)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(CLConform_TARGET_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
set(CLConform_TARGET_ARCH x86)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
set(CLConform_TARGET_ARCH RISCV)
endif()
if(NOT DEFINED CLConform_TARGET_ARCH)
@@ -139,8 +142,12 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
add_cxx_flag_if_supported(-frounding-math)
endif()
else()
# Curb the inclusion of SSE headers when compiling for non x86 targets
if(${CLConform_TARGET_ARCH} STREQUAL "x86_64" OR ${CLConform_TARGET_ARCH}
STREQUAL "x86")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
endif()
endif()
# Set a module's COMPILE_FLAGS if using gcc or clang.

View File

@@ -50,7 +50,7 @@ std::string get_device_info_string(cl_device_id device,
}
/* Determines if an extension is supported by a device. */
int is_extension_available(cl_device_id device, const char *extensionName)
bool is_extension_available(cl_device_id device, const char *extensionName)
{
std::string extString = get_device_extensions_string(device);
std::istringstream ss(extString);

View File

@@ -26,7 +26,7 @@ std::string get_device_info_string(cl_device_id device,
cl_device_info param_name);
/* Determines if an extension is supported by a device. */
int is_extension_available(cl_device_id device, const char *extensionName);
bool is_extension_available(cl_device_id device, const char *extensionName);
/* Returns the version of the extension the device supports or throws an
* exception if the extension is not supported by the device. */

View File

@@ -120,6 +120,12 @@ static int vlog_win32(const char *format, ...);
return retValue; \
} \
}
#define test_object_failure_ret(object, errCode, expectedErrCode, msg, \
retValue) \
{ \
test_assert_error_ret(object == nullptr, msg, retValue); \
test_failure_error_ret(errCode, expectedErrCode, msg, retValue); \
}
#define print_failure_error(errCode, expectedErrCode, msg) \
log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, \
IGetErrorString(errCode), IGetErrorString(expectedErrCode), \

View File

@@ -32,5 +32,15 @@
} \
} while (false)
#define GET_FUNCTION_EXTENSION_ADDRESS(device, FUNC) \
FUNC = \
reinterpret_cast<FUNC##_fn>(clGetExtensionFunctionAddressForPlatform( \
getPlatformFromDevice(device), #FUNC)); \
if (FUNC == nullptr) \
{ \
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
" with " #FUNC "\n"); \
return TEST_FAIL; \
}
#endif // _extensionHelpers_h

View File

@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__riscv)
#define _FPU_MASK_NI 1
static FPU_mode_type fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode | 0x8040);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
*oldMode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
#elif defined(__PPC__) || defined(__riscv)
*oldMode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));

View File

@@ -141,7 +141,6 @@ std::string get_kernel_name(const std::string &source)
{
kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
kernelsList[kernelsList.size() - 1] = '.';
kernelsList[kernelsList.size() - 1] = '.';
}
oss << kernelsList;
}
@@ -678,17 +677,18 @@ static int create_single_kernel_helper_create_program_offline(
return CL_SUCCESS;
}
static int create_single_kernel_helper_create_program(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram,
const char *buildOptions, CompilationMode compilationMode)
int create_single_kernel_helper_create_program(cl_context context,
cl_program *outProgram,
unsigned int numKernelLines,
const char **kernelProgram,
const char *buildOptions)
{
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
std::string filePrefix =
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
bool shouldSaveToDisk = should_save_kernel_source_to_disk(
compilationMode, gCompilationCacheMode, gCompilationCachePath,
gCompilationMode, gCompilationCacheMode, gCompilationCachePath,
filePrefix);
if (shouldSaveToDisk)
@@ -701,7 +701,7 @@ static int create_single_kernel_helper_create_program(
return -1;
}
}
if (compilationMode == kOnline)
if (gCompilationMode == kOnline)
{
int error = CL_SUCCESS;
@@ -718,40 +718,9 @@ static int create_single_kernel_helper_create_program(
else
{
return create_single_kernel_helper_create_program_offline(
context, device, outProgram, numKernelLines, kernelProgram,
buildOptions, compilationMode);
}
}
int create_single_kernel_helper_create_program(cl_context context,
cl_program *outProgram,
unsigned int numKernelLines,
const char **kernelProgram,
const char *buildOptions)
{
return create_single_kernel_helper_create_program(
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
gCompilationMode);
}
int create_single_kernel_helper_create_program_for_device(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram,
const char *buildOptions)
{
return create_single_kernel_helper_create_program(
context, device, outProgram, numKernelLines, kernelProgram,
context, nullptr, outProgram, numKernelLines, kernelProgram,
buildOptions, gCompilationMode);
}
int create_single_kernel_helper_with_build_options(
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
unsigned int numKernelLines, const char **kernelProgram,
const char *kernelName, const char *buildOptions)
{
return create_single_kernel_helper(context, outProgram, outKernel,
numKernelLines, kernelProgram,
kernelName, buildOptions);
}
}
// Creates and builds OpenCL C/C++ program, and creates a kernel
@@ -1239,8 +1208,8 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
{
cl_image_format *list;
cl_uint count = 0;
cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
NULL, &count);
cl_int err =
clGetSupportedImageFormats(context, flags, image_type, 0, NULL, &count);
if (count == 0) return 0;
list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
@@ -1276,7 +1245,6 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
return (i < count) ? 1 : 0;
}
size_t get_pixel_bytes(const cl_image_format *fmt);
size_t get_pixel_bytes(const cl_image_format *fmt)
{
size_t chanCount;

View File

@@ -58,6 +58,13 @@ void helpInfo()
with a very small subset of the tests. This option should not be used
for conformance submission (default: disabled).
--invalid-object-scenarios=<option_1>,<option_2>....
Specify different scenarios to use when
testing for object validity. Options can be:
nullptr To use a nullptr (default)
valid_object_wrong_type To use a valid_object which is not the correct type
NOTE: valid_object_wrong_type option is not required for OpenCL conformance.
For offline compilation (binary and spir-v modes) only:
--compilation-cache-mode <cache-mode>
Specify a compilation caching mode:
@@ -104,6 +111,7 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
}
delArg = 0;
size_t i_object_length = strlen("--invalid-object-scenarios=");
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
{
@@ -264,6 +272,32 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
return -1;
}
}
else if (!strncmp(argv[i],
"--invalid-object-scenarios=", i_object_length))
{
if (strlen(argv[i]) > i_object_length)
{
delArg++;
gInvalidObject = 0;
std::string invalid_objects(argv[i]);
if (invalid_objects.find("nullptr") != std::string::npos)
{
gInvalidObject |= InvalidObject::Nullptr;
}
if (invalid_objects.find("valid_object_wrong_type")
!= std::string::npos)
{
gInvalidObject |= InvalidObject::ValidObjectWrongType;
}
}
else
{
log_error("Program argument for --invalid-object-scenarios was "
"not specified.\n");
return -1;
}
}
// cleaning parameters from argv tab
for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];

View File

@@ -201,6 +201,7 @@ RoundingMode get_round(void)
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
void *FlushToZero(void)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
return NULL;
#elif defined(__riscv)
return NULL;
#else
#error Unknown arch
#endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
_FPU_SETCW(flags);
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#elif defined(__riscv)
return;
#else
#error Unknown arch
#endif

View File

@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "testHarness.h"
#include "stringHelpers.h"
#include "compat.h"
#include <algorithm>
#include <stdio.h>
@@ -21,6 +22,7 @@
#include <string.h>
#include <cassert>
#include <deque>
#include <filesystem>
#include <mutex>
#include <set>
#include <stdexcept>
@@ -33,6 +35,8 @@
#include "imageHelpers.h"
#include "parseParameters.h"
namespace fs = std::filesystem;
#if !defined(_WIN32)
#include <sys/utsname.h>
#include <unistd.h>
@@ -60,6 +64,7 @@ int gInfNanSupport = 1;
int gIsEmbedded = 0;
int gHasLong = 1;
bool gCoreILProgram = true;
int gInvalidObject = InvalidObject::Nullptr;
#define DEFAULT_NUM_ELEMENTS 0x4000
@@ -94,11 +99,25 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
return EXIT_SUCCESS;
}
FILE *file = fopen(fileName, "w");
fs::path file_path(fileName);
// When running under Bazel test, prepend the Bazel output directory to
// the provided path
if (nullptr != getenv("BAZEL_TEST"))
{
char *bazel_output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
if (nullptr != bazel_output_dir)
{
file_path = fs::path(bazel_output_dir) / file_path;
}
}
auto file_path_str = to_string(file_path.u8string());
FILE *file = fopen(file_path_str.c_str(), "w");
if (NULL == file)
{
log_error("ERROR: Failed to open '%s' for writing results.\n",
fileName);
file_path_str.c_str());
return EXIT_FAILURE;
}
@@ -127,7 +146,8 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
log_info("Saving results to %s: %s!\n", file_path_str.c_str(),
save_map[ret]);
return ret;
}
@@ -308,6 +328,8 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
"CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
fileName != NULL ? fileName : "<undefined>");
log_info("\t to save results to JSON file.\n");
log_info("\t When running in Bazel test this is relative to "
"$TEST_UNDECLARED_OUTPUTS_DIR.\n");
log_info("\n");
log_info("Test names:\n");
@@ -1408,6 +1430,8 @@ void PrintArch(void)
vlog("ARCH:\tWindows\n");
#elif defined(__mips__)
vlog("ARCH:\tmips\n");
#elif defined(__riscv)
vlog("ARCH:\tRISC-V\n");
#else
#error unknown arch
#endif

View File

@@ -22,6 +22,7 @@
#include <string>
#include <vector>
#include <type_traits>
class Version {
public:
@@ -257,6 +258,37 @@ extern std::string get_platform_info_string(cl_platform_id platform,
cl_platform_info param_name);
extern bool is_platform_extension_available(cl_platform_id platform,
const char *extensionName);
enum InvalidObject
{
Nullptr = 1 << 0,
ValidObjectWrongType = 1 << 1,
};
extern int gInvalidObject;
template <typename T> std::vector<T> get_invalid_objects(cl_device_id device)
{
std::vector<T> ret;
if ((gInvalidObject & InvalidObject::Nullptr)
&& !(std::is_same<T, cl_platform_id>::value))
{
ret.push_back(nullptr);
}
if (gInvalidObject & InvalidObject::ValidObjectWrongType)
{
if (std::is_same<T, cl_device_id>::value)
{
cl_platform_id platform = getPlatformFromDevice(device);
ret.push_back(reinterpret_cast<T>(platform));
}
else
{
ret.push_back(reinterpret_cast<T>(device));
}
}
return ret;
}
#if !defined(__APPLE__)
void memset_pattern4(void *, const void *, size_t);

View File

@@ -6,7 +6,10 @@ set(${MODULE_NAME}_SOURCES
main.cpp
negative_platform.cpp
negative_queue.cpp
negative_context.cpp
negative_enqueue_marker.cpp
negative_enqueue_map_image.cpp
negative_device.cpp
test_api_consistency.cpp
test_bool.cpp
test_retain.cpp

View File

@@ -0,0 +1,290 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
/* Negative Tests for clCreateContext */
REGISTER_TEST(negative_create_context)
{
cl_context_properties props[3] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr), 0
};
cl_int err = 0;
cl_context ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContext should return CL_INVALID_PLATFORM when:\"an invalid "
"platform object is used with the CL_CONTEXT_PLATFORM property\" using "
"a nullptr",
TEST_FAIL);
props[0] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
props[1] = reinterpret_cast<cl_context_properties>(nullptr);
ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"context "
"property name in properties is not a supported property name\"",
TEST_FAIL);
if (get_device_cl_version(device) >= Version(1, 2))
{
cl_context_properties invalid_value{ -1 };
props[0] = CL_CONTEXT_INTEROP_USER_SYNC;
props[1] = invalid_value;
ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"the "
"value specified for a supported property name is not valid\"",
TEST_FAIL);
cl_bool property_value = CL_FALSE;
cl_context_properties duplicated_property[5] = {
CL_CONTEXT_INTEROP_USER_SYNC,
static_cast<cl_context_properties>(property_value),
CL_CONTEXT_INTEROP_USER_SYNC,
static_cast<cl_context_properties>(property_value), 0
};
ctx = clCreateContext(duplicated_property, 1, &device, nullptr, nullptr,
&err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"the "
"same property name is specified more than once\"",
TEST_FAIL);
}
ctx = clCreateContext(nullptr, 1, nullptr, nullptr, nullptr, &err);
test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE "
"when: \"devices is NULL\"",
TEST_FAIL);
ctx = clCreateContext(nullptr, 0, &device, nullptr, nullptr, &err);
test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE "
"when: \"num_devices is equal to zero\"",
TEST_FAIL);
int user_data = 1; // Arbitrary non-NULL value
ctx = clCreateContext(nullptr, 1, &device, nullptr, &user_data, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE when: \"pfn_notify is "
"NULL but user_data is not NULL\"",
TEST_FAIL);
cl_device_id invalid_device = nullptr;
ctx = clCreateContext(nullptr, 1, &invalid_device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_DEVICE,
"clCreateContext should return CL_INVALID_DEVICE when: \"any device in "
"devices is not a valid device\" using a device set to nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clCreateContextFromType */
REGISTER_TEST(negative_create_context_from_type)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_context_properties props[5] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr),
0, 0, 0
};
cl_int err = 0;
cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT,
nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
"invalid platform object is used with the CL_CONTEXT_PLATFORM "
"property\" using a nullptr",
TEST_FAIL);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
"invalid platform object is used with the CL_CONTEXT_PLATFORM "
"property\" using a valid object that is NOT a platform",
TEST_FAIL);
props[1] = reinterpret_cast<cl_context_properties>(platform);
props[2] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
props[3] = reinterpret_cast<cl_context_properties>(nullptr);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"context property name in properties is not a supported property "
"name\"",
TEST_FAIL);
if (get_device_cl_version(device) >= Version(1, 2))
{
cl_context_properties invalid_value{ -1 };
props[2] = CL_CONTEXT_INTEROP_USER_SYNC;
props[3] = invalid_value;
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"the value specified for a supported property name is not "
"valid\"",
TEST_FAIL);
props[2] = CL_CONTEXT_PLATFORM;
props[3] = reinterpret_cast<cl_context_properties>(platform);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"the same property name is specified more than once\"",
TEST_FAIL);
}
int user_data = 1; // Arbitrary non-NULL value
ctx = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_DEFAULT, nullptr,
&user_data, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_VALUE,
"clCreateContextFromType should return CL_INVALID_VALUE when: "
"\"pfn_notify is NULL but user_data is not NULL\"",
TEST_FAIL);
cl_device_type INVALID_DEVICE_TYPE = 0;
ctx = clCreateContextFromType(nullptr, INVALID_DEVICE_TYPE, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_DEVICE_TYPE,
"clCreateContextFromType should return CL_INVALID_DEVICE_TYPE when: "
"\"device_type is not a valid value\"",
TEST_FAIL);
std::vector<cl_device_type> device_types = { CL_DEVICE_TYPE_CPU,
CL_DEVICE_TYPE_GPU,
CL_DEVICE_TYPE_ACCELERATOR };
if (get_device_cl_version(device) >= Version(1, 2))
{
device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
}
for (auto type : device_types)
{
clContextWrapper tmp_context =
clCreateContextFromType(nullptr, type, nullptr, nullptr, &err);
if (err != CL_SUCCESS)
{
test_object_failure_ret(
tmp_context, err, CL_DEVICE_NOT_FOUND,
"clCreateContextFromType should return CL_DEVICE_NOT_FOUND "
"when: \"no devices that match device_type and property values "
"specified in properties are currently available\"",
TEST_FAIL);
break;
}
}
return TEST_PASS;
}
/* Negative Tests for clRetainContext */
REGISTER_TEST(negative_retain_context)
{
cl_int err = clRetainContext(nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clRetainContext should return CL_INVALID_CONTEXT when: \"context is "
"not a valid OpenCL context\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clReleaseContext */
REGISTER_TEST(negative_release_context)
{
cl_int err = clReleaseContext(nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clReleaseContext should return CL_INVALID_CONTEXT when: \"context is "
"not a valid OpenCL context\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clGetContextInfo */
REGISTER_TEST(negative_get_context_info)
{
cl_uint param_value = 0;
cl_int err = clGetContextInfo(nullptr, CL_CONTEXT_REFERENCE_COUNT,
sizeof(param_value), &param_value, nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clGetContextInfo should return CL_INVALID_CONTEXT when: \"context is "
"not a valid context\" using a nullptr",
TEST_FAIL);
cl_context_info INVALID_PARAM_VALUE = 0;
err = clGetContextInfo(context, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetContextInfo should return CL_INVALID_VALUE when: \"param_name is "
"not one of the supported values\"",
TEST_FAIL);
err = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
sizeof(param_value) - 1, &param_value, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetContextInfo should return CL_INVALID_VALUE when: \"size in bytes "
"specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clSetContextDestructorCallback */
static void CL_CALLBACK callback(cl_context context, void* user_data) {}
REGISTER_TEST_VERSION(negative_set_context_destructor_callback, Version(3, 0))
{
cl_int err = clSetContextDestructorCallback(nullptr, callback, nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clSetContextDestructorCallback should return CL_INVALID_CONTEXT when: "
"\"context is not a valid context\" using a nullptr",
TEST_FAIL);
err = clSetContextDestructorCallback(context, nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clSetContextDestructorCallback should return "
"CL_INVALID_VALUE when: \"pfn_notify is NULL\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -0,0 +1,526 @@
//
// Copyright (c) 2021 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/testHarness.h"
#include <vector>
/* Negative Tests for clGetDeviceInfo */
REGISTER_TEST(negative_get_device_info)
{
cl_device_type device_type = 0;
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetDeviceInfo(invalid_device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type, nullptr);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clGetDeviceInfo should return "
"CL_INVALID_DEVICE when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
constexpr cl_device_info INVALID_PARAM_VALUE = 0;
err = clGetDeviceInfo(device, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceInfo should return CL_INVALID_VALUE when: \"param_name is "
"not one of the supported values\"",
TEST_FAIL);
err = clGetDeviceInfo(device, CL_DEVICE_TYPE, 0, &device_type, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceInfo should return CL_INVALID_VALUE when: \"size in bytes "
"specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clGetDeviceIDs */
REGISTER_TEST(negative_get_device_ids)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_device_id devices = nullptr;
cl_int err(CL_SUCCESS);
for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
{
err = clGetDeviceIDs(invalid_platform, CL_DEVICE_TYPE_DEFAULT, 1,
&devices, nullptr);
test_failure_error_ret(err, CL_INVALID_PLATFORM,
"clGetDeviceIDs should return "
"CL_INVALID_PLATFORM when: \"platform is "
"not a valid platform\"",
TEST_FAIL);
}
cl_device_type INVALID_DEVICE_TYPE = 0;
err = clGetDeviceIDs(platform, INVALID_DEVICE_TYPE, 1, &devices, nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_TYPE,
"clGetDeviceIDs should return CL_INVALID_DEVICE_TYPE when: "
"\"device_type is not a valid value\"",
TEST_FAIL);
err =
clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 0, &devices, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetDeviceIDs should return when: \"num_entries "
"is equal to zero and devices is not NULL\"",
TEST_FAIL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetDeviceIDs should return CL_INVALID_VALUE "
"when: \"both num_devices and devices are NULL\"",
TEST_FAIL);
devices = nullptr;
std::vector<cl_device_type> device_types{ CL_DEVICE_TYPE_CPU,
CL_DEVICE_TYPE_GPU,
CL_DEVICE_TYPE_ACCELERATOR };
if (get_device_cl_version(device) >= Version(1, 2))
{
device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
}
bool platform_supports_all_device_types = true;
for (auto device_type : device_types)
{
err = clGetDeviceIDs(platform, device_type, 1, &devices, nullptr);
if (err == CL_SUCCESS)
{
continue;
}
platform_supports_all_device_types = false;
break;
}
if (platform_supports_all_device_types)
{
log_info("Platform has every Device Type... Skipping Test\n");
}
else
{
test_failure_error_ret(
err, CL_DEVICE_NOT_FOUND,
"clGetDeviceIDs should return CL_DEVICE_NOT_FOUND when: \"no "
"OpenCL devices that matched device_type were found\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clGetDeviceAndHostTimer */
REGISTER_TEST_VERSION(negative_get_device_and_host_timer, Version(2, 1))
{
cl_ulong *device_timestamp = nullptr, *host_timestamp = nullptr;
cl_int err = CL_SUCCESS;
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetDeviceAndHostTimer(invalid_device, device_timestamp,
host_timestamp);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clGetDeviceAndHostTimer should return CL_INVALID_DEVICE when: "
"\"device is not a valid device\"",
TEST_FAIL);
}
cl_platform_id platform = getPlatformFromDevice(device);
// Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
// support timer synchronisation
cl_ulong timer_resolution = 1;
auto device_version = get_device_cl_version(device);
err =
clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
sizeof(timer_resolution), &timer_resolution, nullptr);
test_error(err, "clGetPlatformInfo failed");
if (timer_resolution == 0
&& (device_version == Version(2, 1) || device_version == Version(2, 2)))
{
log_error("Support for device and host timer synchronization is "
"required for platforms supporting OpenCL 2.1 or 2.2.");
return TEST_FAIL;
}
if (timer_resolution != 0)
{
log_info("Platform Supports Timers\n");
log_info("Skipping CL_INVALID_OPERATION tests\n");
err = clGetDeviceAndHostTimer(device, nullptr, host_timestamp);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
"\"host_timestamp or device_timestamp is NULL\" using nullptr for "
"device_timestamp ",
TEST_FAIL);
err = clGetDeviceAndHostTimer(device, device_timestamp, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
"\"host_timestamp or device_timestamp is NULL\" using nullptr for "
"host_timestamp ",
TEST_FAIL);
}
else
{
log_info("Platform does not Support Timers\n");
log_info("Skipping CL_INVALID_VALUE tests\n");
err = clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clGetDeviceAndHostTimer should return CL_INVALID_OPERATION when: "
"\"the platform associated with device does not support device and "
"host timer synchronization\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clGetHostTimer */
REGISTER_TEST_VERSION(negative_get_host_timer, Version(2, 1))
{
cl_ulong host_timestamp = 0;
cl_int err = CL_SUCCESS;
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetHostTimer(invalid_device, &host_timestamp);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clGetHostTimer should return CL_INVALID_DEVICE "
"when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
cl_platform_id platform = getPlatformFromDevice(device);
// Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
// support timer synchronisation
cl_ulong timer_resolution = 1;
auto device_version = get_device_cl_version(device);
err =
clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
sizeof(timer_resolution), &timer_resolution, nullptr);
test_error(err, "clGetPlatformInfo failed");
if (timer_resolution == 0
&& (device_version == Version(2, 1) || device_version == Version(2, 2)))
{
log_error("Support for device and host timer synchronization is "
"required for platforms supporting OpenCL 2.1 or 2.2.");
return TEST_FAIL;
}
if (timer_resolution != 0)
{
log_info("Platform Supports Timers\n");
log_info("Skipping CL_INVALID_OPERATION tests\n");
err = clGetHostTimer(device, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetHostTimer should return CL_INVALID_VALUE "
"when: \"host_timestamp is NULL\"",
TEST_FAIL);
}
else
{
log_info("Platform does not Support Timers\n");
log_info("Skipping CL_INVALID_VALUE tests\n");
err = clGetHostTimer(device, &host_timestamp);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clGetHostTimer should return CL_INVALID_OPERATION when: \"the "
"platform associated with device does not support device and host "
"timer synchronization\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clCreateSubDevices */
enum SupportedPartitionSchemes
{
None = 0,
Equally = 1 << 0,
Counts = 1 << 1,
Affinity = 1 << 2,
All_Schemes = Affinity | Counts | Equally,
};
static int get_supported_properties(cl_device_id device)
{
size_t number_of_properties = 0;
int err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, 0,
nullptr, &number_of_properties);
test_error(err, "clGetDeviceInfo");
std::vector<cl_device_partition_property> supported_properties(
number_of_properties / sizeof(cl_device_partition_property));
err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES,
number_of_properties, &supported_properties.front(),
nullptr);
test_error(err, "clGetDeviceInfo");
int ret = SupportedPartitionSchemes::None;
for (auto property : supported_properties)
{
switch (property)
{
case CL_DEVICE_PARTITION_EQUALLY:
ret |= SupportedPartitionSchemes::Equally;
break;
case CL_DEVICE_PARTITION_BY_COUNTS:
ret |= SupportedPartitionSchemes::Counts;
break;
case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
ret |= SupportedPartitionSchemes::Affinity;
break;
default: break;
}
}
return ret;
}
static std::vector<cl_device_partition_property>
get_invalid_properties(int unsupported_properties)
{
if (unsupported_properties & SupportedPartitionSchemes::Equally)
{
return { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
}
else if (unsupported_properties & SupportedPartitionSchemes::Counts)
{
return { CL_DEVICE_PARTITION_BY_COUNTS, 1,
CL_DEVICE_PARTITION_BY_COUNTS_LIST_END };
}
else if (unsupported_properties & SupportedPartitionSchemes::Affinity)
{
return { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN,
CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0 };
}
else
{
return {};
}
}
static cl_uint get_uint_device_info(const cl_device_id device,
const cl_device_info param_name)
{
cl_uint ret = 0;
cl_int err =
clGetDeviceInfo(device, param_name, sizeof(ret), &ret, nullptr);
test_error(err, "clGetDeviceInfo");
return ret;
}
REGISTER_TEST_VERSION(negative_create_sub_devices, Version(1, 2))
{
int supported_properties = get_supported_properties(device);
if (supported_properties == SupportedPartitionSchemes::None)
{
printf("Device does not support creating subdevices... Skipping\n");
return TEST_SKIPPED_ITSELF;
}
cl_device_partition_property properties[4] = {};
cl_uint max_compute_units =
get_uint_device_info(device, CL_DEVICE_MAX_COMPUTE_UNITS);
cl_uint max_sub_devices =
get_uint_device_info(device, CL_DEVICE_PARTITION_MAX_SUB_DEVICES);
std::vector<cl_device_id> out_devices;
cl_uint max_for_partition = 0;
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[0] = CL_DEVICE_PARTITION_EQUALLY;
properties[1] = 1;
properties[2] = 0;
out_devices.resize(static_cast<size_t>(max_compute_units));
max_for_partition = max_compute_units;
}
else if (supported_properties & SupportedPartitionSchemes::Counts)
{
properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
properties[1] = 1;
properties[2] = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
out_devices.resize(static_cast<size_t>(max_sub_devices));
max_for_partition = max_sub_devices;
}
else
{
properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
properties[2] = 0;
}
properties[3] = 0;
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clCreateSubDevices(invalid_device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE when: \"in_device "
"is not a valid device\"",
TEST_FAIL);
}
err = clCreateSubDevices(device, nullptr, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are not valid\" using a nullptr",
TEST_FAIL);
err =
clCreateSubDevices(device, properties, 0, out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"out_devices "
"is not NULL and num_devices is less than the number of sub-devices "
"created by the partition scheme\"",
TEST_FAIL);
if (supported_properties != SupportedPartitionSchemes::All_Schemes)
{
std::vector<cl_device_partition_property> invalid_properties =
get_invalid_properties(supported_properties
^ SupportedPartitionSchemes::All_Schemes);
err =
clCreateSubDevices(device, invalid_properties.data(),
out_devices.size(), out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are valid but not supported by the "
"device\"",
TEST_FAIL);
}
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[1] = max_compute_units;
err = clCreateSubDevices(device, properties, max_for_partition,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_DEVICE_PARTITION_FAILED,
"clCreateSubDevices should return CL_DEVICE_PARTITION_FAILED when: "
"\"the partition name is supported by the implementation but "
"in_device could not be further partitioned\"",
TEST_FAIL);
}
constexpr cl_device_partition_property INVALID_PARTITION_PROPERTY =
-1; // Aribitrary Invalid number
properties[0] = INVALID_PARTITION_PROPERTY;
err = clCreateSubDevices(device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are not valid\" using an invalid property",
TEST_FAIL);
if (supported_properties & SupportedPartitionSchemes::Counts)
{
properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
properties[1] = max_sub_devices + 1;
err = clCreateSubDevices(device, properties, max_sub_devices + 1,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the partition name "
"specified in properties is CL_DEVICE_ PARTITION_BY_COUNTS and the "
"number of sub-devices requested exceeds "
"CL_DEVICE_PARTITION_MAX_SUB_DEVICES\"",
TEST_FAIL);
properties[1] = -1;
err = clCreateSubDevices(device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the number of compute "
"units requested for one or more sub-devices is less than zero\"",
TEST_FAIL);
}
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[0] = CL_DEVICE_PARTITION_EQUALLY;
properties[1] = max_compute_units + 1;
err = clCreateSubDevices(device, properties, max_compute_units + 1,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the total number of "
"compute units requested exceeds CL_DEVICE_MAX_COMPUTE_UNITS for "
"in_device\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clRetainDevice */
REGISTER_TEST_VERSION(negative_retain_device, Version(1, 2))
{
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clRetainDevice(invalid_device);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clRetainDevice should return CL_INVALID_DEVICE "
"when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clReleaseDevice */
REGISTER_TEST_VERSION(negative_release_device, Version(1, 2))
{
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clReleaseDevice(invalid_device);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clReleaseDevice should return "
"CL_INVALID_DEVICE when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
return TEST_PASS;
}

View File

@@ -0,0 +1,97 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "testBase.h"
#include "harness/typeWrappers.h"
REGISTER_TEST(negative_enqueue_marker_with_wait_list)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_context_properties props[3] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platform),
0
};
cl_int err = CL_SUCCESS;
clContextWrapper ctx =
clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_error(err, "clCreateContext failed");
cl_event ret_event = nullptr;
err = clEnqueueMarkerWithWaitList(nullptr, 0, nullptr, &ret_event);
test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
"clEnqueueMarkerWithWaitList should return "
"CL_INVALID_COMMAND_QUEUE when: \"command_queue is "
"not a valid host command-queue\" using a nullptr",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
clEventWrapper different_ctx_event = clCreateUserEvent(ctx, &err);
test_error(err, "clCreateUserEvent failed");
err =
clEnqueueMarkerWithWaitList(queue, 1, &different_ctx_event, &ret_event);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clEnqueueMarkerWithWaitList should return CL_INVALID_CONTEXT when: "
"\"The context of both the command queue and the events in ret_event "
"wait list are not the same\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
err = clEnqueueMarkerWithWaitList(queue, 1, nullptr, &ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"num_events_in_wait_list > 0 but event_wait_list is NULL\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
clEventWrapper event = clCreateUserEvent(context, &err);
test_error(err, "clCreateUserEvent failed");
err = clEnqueueMarkerWithWaitList(queue, 0, &event, &ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"num_events_in_wait_list is 0 but event_wait_list is not "
"NULL\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
cl_event invalid_event_wait_list[] = { nullptr };
err = clEnqueueMarkerWithWaitList(queue, 1, invalid_event_wait_list,
&ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"event objects in event_wait_list are not valid events\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
return TEST_PASS;
}

View File

@@ -40,9 +40,20 @@ REGISTER_TEST(negative_get_platform_info)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_int err(CL_SUCCESS);
for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
{
err = clGetPlatformInfo(invalid_platform, CL_PLATFORM_VERSION,
sizeof(char*), nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_PLATFORM,
"clGetPlatformInfo should return "
"CL_INVALID_PLATFORM when: \"platform "
"is not a valid platform\"",
TEST_FAIL);
}
constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
cl_int err =
clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "

View File

@@ -348,18 +348,27 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
return TEST_FAIL;
}
{
cl_command_queue_properties queue_properties;
err =
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(queue_properties), &queue_properties, NULL);
test_error(err, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
if (queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
{
constexpr cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
};
clCommandQueueWrapper not_on_device_queue =
clCreateCommandQueueWithProperties(context, device, props, &err);
clCreateCommandQueueWithProperties(context, device, props,
&err);
test_error_fail(err, "clCreateCommandQueueWithProperties failed");
err = clSetDefaultDeviceCommandQueue(context, device,
not_on_device_queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
log_error(
"ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue for "
@@ -369,6 +378,7 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
__FILE__, __LINE__);
}
}
}
return TEST_PASS;
}

View File

@@ -1526,7 +1526,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
size_t threads[1], localThreads[1];
cl_int *constantData, *resultData;
cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
int i;
size_t i;
cl_event event;
cl_int event_status;
MTdata d;
@@ -1556,6 +1556,8 @@ REGISTER_TEST(min_max_constant_buffer_size)
maxAllocSize = get_device_info_max_mem_alloc_size(
device, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
log_info("Reported max alloc size of %" PRIu64 " bytes.\n",
(uint64_t)maxAllocSize);
if (maxSize > maxAllocSize) maxSize = maxAllocSize;
@@ -1590,7 +1592,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
return EXIT_FAILURE;
}
for (i = 0; i < (int)(numberOfInts); i++)
for (i = 0; i < numberOfInts; i++)
constantData[i] = (int)genrand_int32(d);
clMemWrapper streams[3];
@@ -1678,11 +1680,11 @@ REGISTER_TEST(min_max_constant_buffer_size)
sizeToAllocate, resultData, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
for (i = 0; i < (int)(numberOfInts); i++)
for (i = 0; i < numberOfInts; i++)
if (constantData[i] != resultData[i])
{
log_error("Data failed to verify: constantData[%d]=%d != "
"resultData[%d]=%d\n",
log_error("Data failed to verify: constantData[%zu]=%d != "
"resultData[%zu]=%d\n",
i, constantData[i], i, resultData[i]);
free(constantData);
free(resultData);

View File

@@ -487,10 +487,10 @@ compare_kernel_with_expected(cl_context context, cl_device_id device,
int failed_tests = 0;
clKernelWrapper kernel;
clProgramWrapper program;
cl_int err = create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
&kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str());
test_error(err, "create_single_kernel_helper_with_build_options");
test_error(err, "create_single_kernel_helper");
for (size_t i = 0; i < expected_args.size(); ++i)
{
KernelArgInfo actual;
@@ -874,11 +874,10 @@ static int test_null_param(cl_context context, cl_device_id device,
{
clProgramWrapper program;
clKernelWrapper kernel;
cl_int err = create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
&kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str());
test_error_ret(err, "create_single_kernel_helper_with_build_options",
TEST_FAIL);
test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
@@ -916,12 +915,11 @@ static int test_arg_name_size(cl_context context, cl_device_id device,
char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
clProgramWrapper program;
clKernelWrapper kernel;
cl_int err = create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
&kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str());
test_error_ret(err, "create_single_kernel_helper_with_build_options",
TEST_FAIL);
test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
err =
clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,

View File

@@ -382,17 +382,18 @@ REGISTER_TEST(null_required_work_group_size)
clMemWrapper dst;
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
struct KernelAttribInfo
{
std::string str;
cl_uint max_dim;
cl_int wgs[3];
cl_uint min_dim;
};
std::vector<KernelAttribInfo> attribs;
attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
attribs.push_back({ { 2, 1, 1 }, 1 });
attribs.push_back({ { 2, 3, 1 }, 2 });
attribs.push_back({ { 2, 3, 4 }, 3 });
const std::string body_str = R"(
__kernel void wg_size(__global int* dst)
@@ -409,7 +410,11 @@ REGISTER_TEST(null_required_work_group_size)
for (auto& attrib : attribs)
{
const std::string source_str = attrib.str + body_str;
const std::string attrib_str = "__attribute__((reqd_work_group_size("
+ std::to_string(attrib.wgs[0]) + ","
+ std::to_string(attrib.wgs[1]) + ","
+ std::to_string(attrib.wgs[2]) + ")))";
const std::string source_str = attrib_str + body_str;
const char* source = source_str.c_str();
clProgramWrapper program;
@@ -421,21 +426,19 @@ REGISTER_TEST(null_required_work_group_size)
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
{
const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
work_dim >= 3 ? 4 : 1 };
const size_t test_work_group_size =
expected[0] * expected[1] * expected[2];
if ((size_t)expected[0] > device_max_work_item_sizes[0]
|| (size_t)expected[1] > device_max_work_item_sizes[1]
|| (size_t)expected[2] > device_max_work_item_sizes[2]
attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
|| (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
|| (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size)
{
log_info("Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, expected[0], expected[1], expected[2],
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1],
device_max_work_item_sizes[2],
@@ -444,8 +447,10 @@ REGISTER_TEST(null_required_work_group_size)
}
const cl_int zero = 0;
error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
sizeof(expected), 0, nullptr, nullptr);
error =
clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
sizeof(attrib.wgs), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
@@ -458,12 +463,12 @@ REGISTER_TEST(null_required_work_group_size)
results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
if (results[0] != expected[0] || results[1] != expected[1]
|| results[2] != expected[2])
if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
|| results[2] != attrib.wgs[2])
{
log_error("Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
results[0], results[1], results[2]);
return TEST_FAIL;
}
@@ -477,15 +482,15 @@ REGISTER_TEST(null_required_work_group_size)
test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed");
if ((cl_int)suggested[0] != expected[0]
|| (cl_int)suggested[1] != expected[1]
|| (cl_int)suggested[2] != expected[2])
if (suggested[0] != (size_t)attrib.wgs[0]
|| suggested[1] != (size_t)attrib.wgs[1]
|| suggested[2] != (size_t)attrib.wgs[2])
{
log_error("Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2],
(cl_int)suggested[0], (cl_int)suggested[1],
(cl_int)suggested[2]);
"%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
work_dim, attrib.wgs[0], attrib.wgs[1],
attrib.wgs[2], suggested[0], suggested[1],
suggested[2]);
return TEST_FAIL;
}
}

View File

@@ -982,6 +982,12 @@ REGISTER_TEST(negative_invalid_arg_index)
REGISTER_TEST(negative_invalid_arg_size_local)
{
if (true)
{
log_info("Disabling this test temporarily, see internal issue 374.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper local_arg_kernel;

View File

@@ -86,7 +86,8 @@ REGISTER_TEST(queue_hint)
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL);
err = create_single_kernel_helper(context, &program, &kernel, 1,
queue_hint_test_kernel, "vec_cpy");
if (err != 0)
{
return err;

View File

@@ -757,7 +757,7 @@ REGISTER_TEST(spirv_query_dependencies)
}
for (const auto& extension_dep : it->second.extensions)
{
log_error("Checked for SPIR-V extension %s.n",
log_error("Checked for SPIR-V extension %s.\n",
extension_dep.c_str());
}
return TEST_FAIL;

View File

@@ -188,9 +188,17 @@ REGISTER_TEST(arrayimagecopy)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_arrayimagecopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_arrayimagecommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_arrayimagecopy_single_format);
}
return error;
}
@@ -198,7 +206,15 @@ REGISTER_TEST(arrayimagecopy3d)
{
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
test_arrayimagecopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_arrayimagecommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
test_arrayimagecopy_single_format);
}
return error;
}

View File

@@ -72,16 +72,36 @@ static int test_arrayreadwrite_impl(cl_device_id device, cl_context context,
err = clEnqueueWriteBuffer(
queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr);
if (flags & CL_MEM_IMMUTABLE_EXT)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueWriteBuffer is expected to fail "
"with CL_INVALID_OPERATION when the buffer "
"is created with CL_MEM_IMMUTABLE_EXT",
TEST_FAIL);
}
else
{
test_error(err, "clEnqueueWriteBuffer failed");
}
err = clEnqueueReadBuffer(
queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr);
test_error(err, "clEnqueueReadBuffer failed");
const cl_uint* expected_buffer_values = nullptr;
if (flags & CL_MEM_IMMUTABLE_EXT)
{
expected_buffer_values = inptr.data();
}
else
{
expected_buffer_values = reference_vals.data();
}
for (int j = offset; j < offset + cb; j++)
{
if (reference_vals[j] != outptr[j])
if (expected_buffer_values[j] != outptr[j])
{
log_error("ARRAY read, write test failed\n");
err = -1;
@@ -105,3 +125,11 @@ REGISTER_TEST(arrayreadwrite)
return test_arrayreadwrite_impl(device, context, queue, num_elements,
CL_MEM_READ_WRITE);
}
REGISTER_TEST(immutable_arrayreadwrite)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_arrayreadwrite_impl(device, context, queue, num_elements,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR);
}

View File

@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "harness/compat.h"
#include "errorHelpers.h"
#include <stdio.h>
#include <stdlib.h>
@@ -194,6 +195,43 @@ int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, si
return 0;
}
int immutable_copy_region(size_t src, size_t soffset[3], size_t sregion[3],
size_t dst, size_t doffset[3], size_t dregion[3])
{
// Copy between cl buffers.
size_t src_slice_pitch =
(width[src] * height[src] != 1) ? width[src] * height[src] : 0;
size_t dst_slice_pitch =
(width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
size_t src_row_pitch = width[src];
cl_int err;
if (check_overlap_rect(soffset, doffset, sregion, src_row_pitch,
src_slice_pitch))
{
log_info("Copy overlap reported, skipping copy buffer rect\n");
return CL_SUCCESS;
}
else
{
err = clEnqueueCopyBufferRect(gQueue, buffer[src], buffer[dst], soffset,
doffset, sregion, /*dregion,*/
width[src], src_slice_pitch, width[dst],
dst_slice_pitch, 0, nullptr, nullptr);
if (err != CL_INVALID_OPERATION)
{
log_error(
"clEnqueueCopyBufferRect should return "
"CL_INVALID_OPERATION but returned %s between %zu and %zu",
IGetErrorString(err), src, dst);
return TEST_FAIL;
}
}
return TEST_PASS;
}
// This function compares the destination region in the buffer pointed
// to by device, to the source region of the specified verify buffer.
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
@@ -337,6 +375,32 @@ int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, s
return 0;
}
int immutable_write_region(size_t src, size_t soffset[3], size_t sregion[3],
size_t dst, size_t doffset[3], size_t dregion[3])
{
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
size_t src_slice_pitch =
(width[src] * height[src] != 1) ? width[src] * height[src] : 0;
size_t dst_slice_pitch =
(width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
cl_int error = clEnqueueWriteBufferRect(
gQueue, buffer[dst], CL_TRUE, doffset, soffset, dregion, width[dst],
dst_slice_pitch, width[src], src_slice_pitch, tmp_buffer, 0, nullptr,
nullptr);
if (error != CL_INVALID_OPERATION)
{
log_error("clEnqueueWriteBufferRect should return CL_INVALID_OPERATION "
"but retured %s between %zu and %zu",
IGetErrorString(error), src, dst);
return TEST_FAIL;
}
return TEST_PASS;
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
@@ -591,3 +655,16 @@ REGISTER_TEST(bufferreadwriterect)
device, context, queue, num_elements,
CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions);
}
REGISTER_TEST(immutable_bufferreadwriterect)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
TestFunctions test_functions;
test_functions.copy = immutable_copy_region;
test_functions.read = read_verify_region;
test_functions.write = immutable_write_region;
return test_bufferreadwriterect_impl(
device, context, queue, num_elements,
CL_MEM_USE_HOST_PTR | CL_MEM_IMMUTABLE_EXT, test_functions);
}

View File

@@ -103,11 +103,11 @@ REGISTER_TEST_VERSION(enqueued_local_size, Version(2, 0))
std::string cl_std = "-cl-std=CL";
cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
err = create_single_kernel_helper_with_build_options(
err = create_single_kernel_helper(
context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
"test_enqueued_local_size_1d", cl_std.c_str());
test_error(err, "create_single_kernel_helper failed");
err = create_single_kernel_helper_with_build_options(
err = create_single_kernel_helper(
context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
"test_enqueued_local_size_2d", cl_std.c_str());
test_error(err, "create_single_kernel_helper failed");

View File

@@ -27,6 +27,82 @@ using test_function_t = int (*)(cl_device_id, cl_context, cl_command_queue,
cl_mem_flags, cl_mem_flags, cl_mem_object_type,
const cl_image_format *);
static int test_negative_imagearraycopy_single_format(
cl_device_id device, cl_context context, cl_command_queue queue,
cl_mem_flags image_flags, cl_mem_flags buffer_flags,
cl_mem_object_type image_type, const cl_image_format *format)
{
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper image;
clMemWrapper buffer;
const size_t img_width = 512;
const size_t img_height = 512;
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;
RandomSeed seed(gRandomSeed);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, img_depth };
log_info("Testing %s %s\n",
GetChannelOrderName(format->image_channel_order),
GetChannelTypeName(format->image_channel_data_type));
elem_size = get_pixel_size(format);
buffer_size =
sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
if (image_flags & CL_MEM_USE_HOST_PTR || image_flags & CL_MEM_COPY_HOST_PTR)
{
imgptr.reset(static_cast<cl_uchar *>(
create_random_data(kUChar, seed, buffer_size)));
}
bufptr.reset(
static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size)));
if (CL_MEM_OBJECT_IMAGE2D == image_type)
{
image = create_image_2d(context, image_flags, format, img_width,
img_height, 0, imgptr.get(), &err);
}
else
{
image =
create_image_3d(context, image_flags, format, img_width, img_height,
img_depth, 0, 0, imgptr.get(), &err);
}
test_error(err, "create_image_xd failed");
if (!(image_flags & CL_MEM_USE_HOST_PTR
|| image_flags & CL_MEM_COPY_HOST_PTR))
{
imgptr.reset(static_cast<cl_uchar *>(
create_random_data(kUChar, seed, buffer_size)));
err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0,
imgptr.get(), 0, nullptr, nullptr);
test_error(err, "clEnqueueWriteImage failed");
}
buffer =
clCreateBuffer(context, buffer_flags, buffer_size, bufptr.get(), &err);
test_error(err, "clCreateBuffer failed");
err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0,
nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clEnqueueCopyImageToBuffer should return CL_INVALID_OPERATION when: "
"\" dst_buffer is created with CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
return TEST_PASS;
}
static int test_imagearraycopy_single_format(
cl_device_id device, cl_context context, cl_command_queue queue,
cl_mem_flags image_flags, cl_mem_flags buffer_flags,
@@ -188,9 +264,18 @@ REGISTER_TEST(imagearraycopy)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_imagearraycopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_imagearraycommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_imagearraycopy_single_format);
}
return error;
}
@@ -198,7 +283,38 @@ REGISTER_TEST(imagearraycopy3d)
{
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
test_imagearraycopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_imagearraycommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
test_imagearraycopy_single_format);
}
return error;
}
REGISTER_TEST(negative_imagearraycopy)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_OBJECT_IMAGE2D,
test_negative_imagearraycopy_single_format);
}
REGISTER_TEST(negative_imagearraycopy3d)
{
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device);
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_OBJECT_IMAGE3D,
test_negative_imagearraycopy_single_format);
}

View File

@@ -50,8 +50,8 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
{
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
}
cl_int err = create_single_kernel_helper_with_build_options(
context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
cl_int err = create_single_kernel_helper(context, &p, &k, 4,
sizeof_kernel_code, "test_sizeof");
test_error(err, "Failed to build kernel/program.");
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );

View File

@@ -19,19 +19,24 @@
#include "testBase.h"
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
0,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR,
CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR
| CL_MEM_ALLOC_HOST_PTR };
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
"0",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR",
};
int main( int argc, const char *argv[] )

View File

@@ -25,6 +25,6 @@
extern const cl_mem_flags flag_set[];
extern const char* flag_set_names[];
#define NUM_FLAGS 5
#define NUM_FLAGS 8
#endif // _testBase_h

View File

@@ -39,7 +39,8 @@ static int verify_copy_buffer(int *inptr, int *outptr, int n)
using alignedOwningPtr = std::unique_ptr<cl_int[], decltype(&align_free)>;
static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
static int test_copy(cl_device_id device, cl_command_queue queue,
cl_context context, int num_elements, MTdata d)
{
clMemWrapper buffers[2];
cl_int err = CL_SUCCESS;
@@ -76,10 +77,19 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL;
}
const bool has_immutable_memory_extension =
is_extension_available(device, "cl_ext_immutable_memory_objects");
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
|| (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
&& !has_immutable_memory_extension)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (int i = 0; i < num_elements; i++)
@@ -89,7 +99,6 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
reference_ptr[i] = (int)genrand_int32(d);
}
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
sizeof(cl_int) * num_elements,
@@ -116,7 +125,9 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL;
}
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
&& !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements,
reference_ptr.get(), 0, nullptr,
@@ -130,11 +141,44 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0,
sizeof(cl_int) * num_elements, 0, nullptr,
nullptr);
if ( err != CL_SUCCESS ){
if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
if (err != CL_INVALID_OPERATION)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueCopyBuffer should return "
"CL_INVALID_OPERATION when: "
"\"dst_buffer is created with "
"CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
return TEST_FAIL;
}
}
else if (err != CL_SUCCESS)
{
print_error(err, "clCopyArray failed\n");
return TEST_FAIL;
}
err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
num_elements))
{
log_error("test failed\n");
return TEST_FAIL;
}
else
{
log_info("test passed\n");
}
// Reset out_ptr
for (int i = 0; i < num_elements; i++)
{
out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
@@ -143,14 +187,20 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL;
}
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
num_elements))
int *target_buffer = reference_ptr.get();
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
log_error( " test failed\n" );
target_buffer = invalid_ptr.get();
}
if (verify_copy_buffer(target_buffer, out_ptr.get(), num_elements))
{
log_error("test failed\n");
return TEST_FAIL;
}
else{
log_info( " test passed\n" );
else
{
log_info("test passed\n");
}
} // dst flags
} // src flags
@@ -160,7 +210,10 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
} // end test_copy()
static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
static int testPartialCopy(cl_device_id device, cl_command_queue queue,
cl_context context, int num_elements,
cl_uint srcStart, cl_uint dstStart, int size,
MTdata d)
{
clMemWrapper buffers[2];
cl_int err = CL_SUCCESS;
@@ -197,10 +250,19 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
return TEST_FAIL;
}
const bool has_immutable_memory_extension =
is_extension_available(device, "cl_ext_immutable_memory_objects");
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
|| (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
&& !has_immutable_memory_extension)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (int i = 0; i < num_elements; i++)
@@ -236,7 +298,9 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
return TEST_FAIL;
}
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
&& !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements,
reference_ptr.get(), 0, nullptr,
@@ -251,27 +315,72 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
queue, buffers[0], buffers[1], srcStart * sizeof(cl_int),
dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr,
nullptr);
if ( err != CL_SUCCESS){
print_error(err, "clEnqueueCopyBuffer failed\n");
if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
if (err != CL_INVALID_OPERATION)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueCopyBuffer should return "
"CL_INVALID_OPERATION when: "
"\"dst_buffer is created with "
"CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
}
}
else if (err != CL_SUCCESS)
{
print_error(err, "clCopyArray failed\n");
return TEST_FAIL;
}
err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueReadBuffer failed\n");
return TEST_FAIL;
}
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
num_elements))
{
log_error("test failed\n");
return TEST_FAIL;
}
else
{
log_info("test passed\n");
}
// Reset out_ptr
for (int i = 0; i < num_elements; i++)
{
out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
if ( err != CL_SUCCESS){
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueReadBuffer failed\n");
return TEST_FAIL;
}
if (verify_copy_buffer(reference_ptr.get() + srcStart,
out_ptr.get() + dstStart, size))
cl_int *target_buffer = reference_ptr.get() + srcStart;
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
log_error("buffer_COPY test failed\n");
target_buffer = invalid_ptr.get();
}
if (verify_copy_buffer(target_buffer, out_ptr.get() + dstStart,
size))
{
log_error("test failed\n");
return TEST_FAIL;
}
else{
log_info("buffer_COPY test passed\n");
else
{
log_info("test passed\n");
}
} // dst mem flags
} // src mem flags
@@ -289,7 +398,7 @@ REGISTER_TEST(buffer_copy)
// test the preset size
log_info( "set size: %d: ", num_elements );
if (test_copy(queue, context, num_elements, d) != TEST_PASS)
if (test_copy(device, queue, context, num_elements, d) != TEST_PASS)
{
err++;
}
@@ -298,7 +407,7 @@ REGISTER_TEST(buffer_copy)
for ( i = 0; i < 8; i++ ){
size = (int)get_random_float(2.f,131072.f, d);
log_info( "random size: %d: ", size );
if (test_copy(queue, context, size, d) != TEST_PASS)
if (test_copy(device, queue, context, size, d) != TEST_PASS)
{
err++;
}
@@ -324,8 +433,8 @@ REGISTER_TEST(buffer_partial_copy)
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
if (testPartialCopy(queue, context, num_elements, srcStart, dstStart,
size, d)
if (testPartialCopy(device, queue, context, num_elements, srcStart,
dstStart, size, d)
!= TEST_PASS)
{
err++;

View File

@@ -598,6 +598,12 @@ static int test_buffer_fill(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clEventWrapper event[2];
clMemWrapper buffers[2];
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -721,6 +727,12 @@ REGISTER_TEST(buffer_fill_struct)
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clProgramWrapper program;
clKernelWrapper kernel;
log_info("Testing with cl_mem_flags: %s\n",

View File

@@ -592,6 +592,12 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer;
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
@@ -671,6 +677,101 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
} // end test_buffer_map_read()
int test_immutable_buffer_map(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements,
size_t size, const char *type, int loops)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
void *outptr[5];
cl_int err;
int i;
size_t ptrSizes[5];
int total_errors = 0;
MTdataHolder mtdata(gRandomSeed);
size_t min_alignment = get_min_alignment(context);
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
// embedded devices don't support long/ulong so skip over
if (!gHasLong && strstr(type, "long")) return TEST_SKIPPED_ITSELF;
for (i = 0; i < loops; i++)
{
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Testing writing from immutable flags
if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
continue;
}
clMemWrapper buffer;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if (!outptr[i])
{
log_error(" unable to allocate %d bytes of memory\n",
(int)ptrSizes[i] * num_elements);
return TEST_FAIL;
}
generate_random_data(kUChar, ptrSizes[i] * num_elements, mtdata,
outptr[i]);
buffer =
clCreateBuffer(context, flag_set[src_flag_id],
ptrSizes[i] * num_elements, outptr[i], &err);
if (nullptr == buffer || CL_SUCCESS != err)
{
print_error(err, "clCreateBuffer failed\n");
align_free(outptr[i]);
return TEST_FAIL;
}
void *mappedPtr = clEnqueueMapBuffer(
queue, buffer, CL_TRUE, CL_MAP_READ, 0,
ptrSizes[i] * num_elements, 0, nullptr, nullptr, &err);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueMapBuffer failed");
align_free(outptr[i]);
return TEST_FAIL;
}
if (memcmp(mappedPtr, outptr[i], ptrSizes[i] * num_elements) != 0)
{
log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1 << i, flag_set_names[src_flag_id]);
total_errors++;
}
else
{
log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1 << i, flag_set_names[src_flag_id]);
}
err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, nullptr,
nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
// If we are using the outptr[i] as backing via USE_HOST_PTR we need
// to make sure we are done before freeing.
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR))
{
err = clFinish(queue);
test_error(err, "clFinish failed");
}
align_free(outptr[i]);
}
} // cl_mem_flags
return total_errors > 0 ? TEST_FAIL : TEST_PASS;
}
#define DECLARE_LOCK_TEST(type, realType) \
REGISTER_TEST(buffer_map_read_##type) \
@@ -691,6 +792,28 @@ DECLARE_LOCK_TEST(char, cl_char)
DECLARE_LOCK_TEST(uchar, cl_uchar)
DECLARE_LOCK_TEST(float, cl_float)
#undef DECLARE_LOCK_TEST
#define DECLARE_LOCK_TEST(type, realType) \
REGISTER_TEST(immutable_buffer_map_##type) \
{ \
return test_immutable_buffer_map(device, context, queue, num_elements, \
sizeof(realType), #type, 5); \
}
DECLARE_LOCK_TEST(int, cl_int)
DECLARE_LOCK_TEST(uint, cl_uint)
DECLARE_LOCK_TEST(long, cl_long)
DECLARE_LOCK_TEST(ulong, cl_ulong)
DECLARE_LOCK_TEST(short, cl_short)
DECLARE_LOCK_TEST(ushort, cl_ushort)
DECLARE_LOCK_TEST(char, cl_char)
DECLARE_LOCK_TEST(uchar, cl_uchar)
DECLARE_LOCK_TEST(float, cl_float)
#undef DECLARE_LOCK_TEST
REGISTER_TEST(buffer_map_read_struct)
{
int (*foo)(void *,int);

View File

@@ -666,6 +666,12 @@ static int test_buffer_read(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer;
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
@@ -809,6 +815,12 @@ static int test_buffer_read_async(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer;
clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -946,6 +958,12 @@ static int test_buffer_read_array_barrier(
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer;
clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);

View File

@@ -660,8 +660,18 @@ static int test_buffer_write(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffers[2];
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -834,8 +844,19 @@ REGISTER_TEST(buffer_write_struct)
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffers[2];
inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -996,7 +1017,17 @@ static int test_buffer_write_array_async(
ptrSizes[4] = ptrSizes[3] << 1;
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
loops = ( loops < 5 ? loops : 5 );
@@ -1974,3 +2005,256 @@ REGISTER_TEST(buffer_write_async_ulong)
} // end test_buffer_ulong_write_array_async()
int immutable_test_buffer_write(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements,
size_t size, const char *type, int loops,
void *inptr[5], const char *kernelCode[],
const char *kernelName[],
int (*fn)(void *, void *, int), MTdataHolder &d)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
void *outptr[5];
clProgramWrapper program[5];
clKernelWrapper kernel[5];
size_t ptrSizes[5];
size_t global_work_size[3];
cl_int err;
int i;
int src_flag_id, dst_flag_id;
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (size_t)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
loops = (loops < 5 ? loops : 5);
for (i = 0; i < loops; i++)
{
err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
&kernelCode[i], kernelName[i]);
if (err)
{
log_error(" Error creating program for %s\n", type);
return TEST_FAIL;
}
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Testing writing from immutable flags
if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
cl_mem_flags src_mem_flags = flag_set[src_flag_id];
cl_mem_flags dst_mem_flags = flag_set[dst_flag_id];
clMemWrapper buffers[2];
buffers[0] =
clCreateBuffer(context, src_mem_flags,
ptrSizes[i] * num_elements, inptr[i], &err);
if (nullptr == buffers[0] || CL_SUCCESS != err)
{
align_free(outptr[i]);
print_error(err, " clCreateBuffer failed\n");
return TEST_FAIL;
}
if (!strcmp(type, "half"))
{
outptr[i] = align_malloc(ptrSizes[i] * (num_elements * 2),
min_alignment);
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * 2 * num_elements,
outptr[i], &err);
}
else
{
outptr[i] =
align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ((dst_mem_flags & CL_MEM_USE_HOST_PTR)
|| (dst_mem_flags & CL_MEM_COPY_HOST_PTR))
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * num_elements,
outptr[i], &err);
else
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * num_elements,
nullptr, &err);
}
if (err)
{
align_free(outptr[i]);
print_error(err, " clCreateBuffer failed\n");
return TEST_FAIL;
}
err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
(void *)&buffers[0]);
err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
(void *)&buffers[1]);
if (err != CL_SUCCESS)
{
align_free(outptr[i]);
print_error(err, " clSetKernelArg failed");
return TEST_FAIL;
}
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, nullptr,
global_work_size, nullptr, 0,
nullptr, nullptr);
if (err != CL_SUCCESS)
{
print_error(err, " clEnqueueNDRangeKernel failed");
align_free(outptr[i]);
return TEST_FAIL;
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
ptrSizes[i] * num_elements, outptr[i],
0, nullptr, nullptr);
if (err != CL_SUCCESS)
{
align_free(outptr[i]);
print_error(err, " clEnqueueReadBuffer failed");
return TEST_FAIL;
}
if (fn(inptr[i], outptr[i],
(int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0])))
{
log_error(
" %s%d test failed. cl_mem_flags src: %s, dst: %s\n",
type, 1 << i, flag_set_names[src_flag_id],
flag_set_names[dst_flag_id]);
total_errors++;
}
else
{
log_info(
" %s%d test passed. cl_mem_flags src: %s, dst: %s\n",
type, 1 << i, flag_set_names[src_flag_id],
flag_set_names[dst_flag_id]);
}
// cleanup
align_free(outptr[i]);
}
} // dst cl_mem_flag
} // src cl_mem_flag
return total_errors;
} // end test_buffer_write()
REGISTER_TEST(write_from_immutable_buffer_to_buffer)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
static const char *immutable_buffer_write_int_kernel_code[] = {
R"(
__kernel void test_buffer_write_int(constant int *src, __global int *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int2(constant int2 *src, __global int2 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int4(constant int4 *src, __global int4 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int8(constant int8 *src, __global int8 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int16(constant int16 *src, __global int16 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})"
};
static const char *immutable_int_kernel_name[] = {
"test_buffer_write_int", "test_buffer_write_int2",
"test_buffer_write_int4", "test_buffer_write_int8",
"test_buffer_write_int16"
};
if (gTestMap)
{
log_error("Immutable buffers cannot be mapped with CL_MEM_WRITE\n");
return TEST_SKIPPED_ITSELF;
}
int *inptr[5];
size_t ptrSizes[5];
int i, err;
cl_uint j;
int (*foo)(void *, void *, int);
MTdataHolder d(gRandomSeed);
size_t min_alignment = get_min_alignment(context);
foo = verify_write_int;
ptrSizes[0] = sizeof(cl_int);
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
for (i = 0; i < 5; i++)
{
inptr[i] =
(int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
for (j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++)
inptr[i][j] = (int)genrand_int32(d);
}
err = immutable_test_buffer_write(device, context, queue, num_elements,
sizeof(cl_int), "int", 5, (void **)inptr,
immutable_buffer_write_int_kernel_code,
immutable_int_kernel_name, foo, d);
for (i = 0; i < 5; i++)
{
align_free((void *)inptr[i]);
}
return err;
}

View File

@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
{
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
return -CL_HALF_MAX;
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
{
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
{
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
return CL_HALF_MAX;
}
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
{

View File

@@ -22,12 +22,14 @@
#include "host_atomics.h"
#include "CL/cl_half.h"
#include <algorithm>
#include <iomanip>
#include <limits>
#include <sstream>
#include <vector>
#include "CL/cl_half.h"
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount()
@@ -76,6 +78,10 @@ extern int
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_device_fp_config gDoubleFPConfig;
extern cl_device_fp_config gFloatFPConfig;
extern cl_device_fp_config gHalfFPConfig;
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
@@ -91,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes(
cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
std::vector<TExplicitMemoryScopeType> &memoryScopes);
union FloatIntUnion {
float f;
uint32_t i;
};
template <typename HostDataType> bool is_qnan(const HostDataType &value)
{
if constexpr (std::is_same_v<HostDataType, float>)
{
FloatIntUnion u;
u.f = value;
if ((u.i & 0x7F800000) != 0x7F800000) return false;
return (u.i & 0x00400000) != 0;
}
else
return std::isnan(value);
}
template <typename HostDataType> bool is_snan(const HostDataType &value)
{
if constexpr (std::is_same_v<HostDataType, float>)
{
FloatIntUnion u;
u.f = value;
if ((u.i & 0x7F800000) != 0x7F800000) return false;
return (u.i & 0x00400000) == 0;
}
else
return std::isnan(value);
}
class AtomicTypeInfo {
public:
TExplicitAtomicType _type;
@@ -154,12 +191,12 @@ public:
return 0;
}
CBasicTest(TExplicitAtomicType dataType, bool useSVM)
: CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
_useSVM(useSVM), _startValue(255), _localMemory(false),
_declaredInProgram(false), _usedInFunction(false),
_genericAddrSpace(false), _oldValueCheck(true),
_localRefValues(false), _maxGroupSize(0), _passCount(0),
_iterations(gInternalIterations)
: CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255),
_localMemory(false), _declaredInProgram(false),
_usedInFunction(false), _genericAddrSpace(false),
_oldValueCheck(true), _localRefValues(false), _maxGroupSize(0),
_passCount(0), _iterations(gInternalIterations),
_maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0)
{}
virtual ~CBasicTest()
{
@@ -178,12 +215,15 @@ public:
{
return false;
}
virtual bool
IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue)
{
return expected != testValues[whichDestValue];
return expected
!= static_cast<HostDataType>(testValues[whichDestValue]);
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
@@ -239,12 +279,12 @@ public:
cl_command_queue queue)
{
int error = 0;
DeclaredInProgram(false);
SetDeclaredInProgram(false);
EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue));
if (!UseSVM())
{
DeclaredInProgram(true);
SetDeclaredInProgram(true);
EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue));
}
@@ -255,13 +295,13 @@ public:
cl_command_queue queue)
{
int error = 0;
if (_maxDeviceThreads > 0 && !UseSVM())
if (_deviceThreads > 0 && !UseSVM())
{
SetLocalMemory(true);
EXECUTE_TEST(
error, ExecuteForEachDeclarationType(deviceID, context, queue));
}
if (_maxDeviceThreads + MaxHostThreads() > 0)
if (_deviceThreads + MaxHostThreads() > 0)
{
SetLocalMemory(false);
EXECUTE_TEST(
@@ -270,7 +310,7 @@ public:
return error;
}
virtual int Execute(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
cl_command_queue queue, int num_elements) override
{
if (sizeof(HostAtomicType) != DataType().Size(deviceID))
{
@@ -310,7 +350,12 @@ public:
if (UseSVM()) return 0;
_maxDeviceThreads = 0;
}
if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
_deviceThreads = (num_elements > 0)
? std::min(cl_uint(num_elements), _maxDeviceThreads)
: _maxDeviceThreads;
if (_deviceThreads + MaxHostThreads() == 0) return 0;
return ExecuteForEachParameterSet(deviceID, context, queue);
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
@@ -323,7 +368,7 @@ public:
{
return AtomicTypeExtendedInfo<HostDataType>(_dataType);
}
cl_uint _maxDeviceThreads;
virtual cl_uint MaxHostThreads()
{
if (UseSVM() || gHost)
@@ -420,7 +465,7 @@ public:
HostDataType StartValue() { return _startValue; }
void SetLocalMemory(bool local) { _localMemory = local; }
bool LocalMemory() { return _localMemory; }
void DeclaredInProgram(bool declaredInProgram)
void SetDeclaredInProgram(bool declaredInProgram)
{
_declaredInProgram = declaredInProgram;
}
@@ -477,6 +522,8 @@ private:
cl_uint _currentGroupSize;
cl_uint _passCount;
const cl_int _iterations;
cl_uint _maxDeviceThreads;
cl_uint _deviceThreads;
};
template <typename HostAtomicType, typename HostDataType>
@@ -702,6 +749,28 @@ public:
cl_context context,
cl_command_queue queue)
{
// Comparator for orders and scopes.
const auto checkValidity = [](TExplicitMemoryOrderType success,
TExplicitMemoryOrderType failure,
TExplicitMemoryScopeType scope) {
// Both memory order arguments must be set (or neither).
if ((success == MEMORY_ORDER_EMPTY || failure == MEMORY_ORDER_EMPTY)
&& success != failure)
return false;
// Memory scope without memory order is disallowed.
if (success == MEMORY_ORDER_EMPTY && scope != MEMORY_SCOPE_EMPTY)
return false;
// Failure must not be release or acq_rel.
if (failure == MEMORY_ORDER_RELEASE
|| failure == MEMORY_ORDER_ACQ_REL)
return false;
// Failure must not be stronger than success.
return failure <= success;
};
// repeat test for each reasonable memory order/scope combination
std::vector<TExplicitMemoryOrderType> memoryOrder;
std::vector<TExplicitMemoryScopeType> memoryScope;
@@ -719,16 +788,10 @@ public:
{
for (unsigned si = 0; si < memoryScope.size(); si++)
{
if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
|| memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
&& memoryOrder[oi] != memoryOrder[o2i])
continue; // both memory order arguments must be set (or
// none)
if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
|| memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
&& memoryScope[si] != MEMORY_SCOPE_EMPTY)
continue; // memory scope without memory order is not
// allowed
if (!checkValidity(memoryOrder[oi], memoryOrder[o2i],
memoryScope[si]))
continue;
MemoryOrder(memoryOrder[oi]);
MemoryOrder2(memoryOrder[o2i]);
MemoryScope(memoryScope[si]);
@@ -895,12 +958,25 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+ ss.str() + "] = {\n";
ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF)
ss << cl_half_to_float(static_cast<cl_half>(_startValue));
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (std::isinf(_startValue))
ss << (_startValue < 0 ? "-" : "") << "INFINITY";
else if (std::isnan(_startValue))
ss << "0.0 / 0.0";
else
ss << std::setprecision(
std::numeric_limits<HostDataType>::max_digits10)
<< _startValue;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
ss << std::setprecision(std::numeric_limits<float>::max_digits10)
<< cl_half_to_float(_startValue);
}
else
ss << _startValue;
@@ -1137,7 +1213,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
MTdata d;
size_t typeSize = DataType().Size(deviceID);
deviceThreadCount = _maxDeviceThreads;
deviceThreadCount = _deviceThreads;
hostThreadCount = MaxHostThreads();
threadCount = deviceThreadCount + hostThreadCount;
@@ -1202,9 +1278,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
+ FunctionCode() + KernelCode(numDestItems);
programLine = programSource.c_str();
if (create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, &programLine,
"test_atomic_kernel", gOldAPI ? "" : nullptr))
if (create_single_kernel_helper(context, &program, &kernel, 1,
&programLine, "test_atomic_kernel"))
{
return -1;
}
@@ -1289,7 +1364,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
numDestItems = NumResults(threadCount, deviceID);
destItems.resize(numDestItems);
for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
for (cl_uint i = 0; i < numDestItems; i++)
destItems[i] = static_cast<HostAtomicType>(_startValue);
// Create main buffer with atomic variables (array size dependent on
// particular test)
@@ -1462,12 +1538,13 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
startRefValues.size() ? &startRefValues[0] : 0, i))
break; // no expected value function provided
if (IsTestNotAsExpected(expected, destItems, i))
if (IsTestNotAsExpected(expected, destItems, startRefValues, i))
{
std::stringstream logLine;
logLine << "ERROR: Result " << i
<< " from kernel does not validate! (should be " << expected
<< ", was " << destItems[i] << ")\n";
<< ", was " << static_cast<HostDataType>(destItems[i])
<< ")\n";
log_error("%s", logLine.str().c_str());
for (i = 0; i < threadCount; i++)
{
@@ -1534,7 +1611,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
// clEnqueueNDRangeKernel
{
/* Re-write the starting value */
for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
for (size_t i = 0; i < numDestItems; i++)
destItems[i] = static_cast<HostAtomicType>(_startValue);
refValues[0] = 0;
if (deviceThreadCount > 0)
{

View File

@@ -18,12 +18,15 @@
#include "harness/testHarness.h"
#include <mutex>
#include "CL/cl_half.h"
#ifdef WIN32
#include "Windows.h"
#endif
extern cl_half_rounding_mode gHalfRoundingMode;
//flag for test verification (good test should discover non-atomic functions and fail)
//#define NON_ATOMIC_FUNCTIONS
@@ -37,6 +40,93 @@ enum TExplicitMemoryOrderType
MEMORY_ORDER_SEQ_CST
};
// Wrapper class for half-precision
class HostHalf {
public:
// Convert from semantic values
HostHalf(cl_uint value = 0)
: value(
cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
{}
HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
{}
HostHalf(double value): HostHalf(static_cast<float>(value)) {}
// Convert to semantic values
operator cl_uint() const
{
return static_cast<cl_uint>(cl_half_to_float(value));
}
operator float() const { return cl_half_to_float(value); }
operator double() const
{
return static_cast<double>(cl_half_to_float(value));
}
// Construct from bit representation
HostHalf(cl_half value): value(value) {}
// Get the underlying bit representation
operator cl_half() const { return value; }
HostHalf operator-() const
{
return HostHalf(
cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
}
#define GENERIC_OP(RetType, op) \
RetType operator op(const HostHalf &other) const \
{ \
return RetType(cl_half_to_float(value) \
op cl_half_to_float(other.value)); \
}
GENERIC_OP(bool, ==)
GENERIC_OP(bool, !=)
GENERIC_OP(bool, <)
GENERIC_OP(bool, <=)
GENERIC_OP(bool, >)
GENERIC_OP(bool, >=)
GENERIC_OP(HostHalf, +)
GENERIC_OP(HostHalf, -)
GENERIC_OP(HostHalf, *)
GENERIC_OP(HostHalf, /)
#undef GENERIC_OP
#define INPLACE_OP(op) \
HostHalf &operator op##=(const HostHalf &other) \
{ \
value = cl_half_from_float(cl_half_to_float(value) \
op cl_half_to_float(other.value), \
gHalfRoundingMode); \
return *this; \
}
INPLACE_OP(+)
INPLACE_OP(-)
INPLACE_OP(*)
INPLACE_OP(/)
#undef INPLACE_OP
friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
{
float f = cl_half_to_float(hh.value);
os << f;
return os;
}
private:
cl_half value;
};
namespace std {
inline HostHalf abs(const HostHalf &value)
{
return value < HostHalf(0) ? -value : value;
}
} // namespace std
// host atomic types (applicable for atomic functions supported on host OS)
#ifdef WIN32
#define HOST_ATOMIC_INT unsigned long
@@ -73,7 +163,7 @@ enum TExplicitMemoryOrderType
#define HOST_UINT cl_uint
#define HOST_LONG cl_long
#define HOST_ULONG cl_ulong
#define HOST_HALF cl_half
#define HOST_HALF HostHalf
#define HOST_FLOAT cl_float
#define HOST_DOUBLE cl_double
@@ -91,6 +181,18 @@ enum TExplicitMemoryOrderType
extern cl_half_rounding_mode gHalfRoundingMode;
template <typename HostAtomicType>
constexpr bool is_host_atomic_fp_v =
std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
template <typename HostDataType>
constexpr bool is_host_fp_v =
std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
std::is_same<HostDataType, HOST_FLOAT>,
std::is_same<HostDataType, HOST_DOUBLE>>;
// host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
@@ -98,24 +200,13 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
if constexpr (is_host_atomic_fp_v<AtomicType>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a += c;
CorrespondingType new_value = old_value + c;
*a = static_cast<AtomicType>(new_value);
return old_value;
}
else
@@ -135,21 +226,13 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
if constexpr (is_host_atomic_fp_v<AtomicType>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a -= c;
return old_value;
}
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
gHalfRoundingMode);
CorrespondingType new_value = old_value - c;
*a = static_cast<AtomicType>(new_value);
return old_value;
}
else
@@ -170,12 +253,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
if (sizeof(CorrespondingType) == 2)
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
if constexpr (sizeof(CorrespondingType) == 2)
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
*reinterpret_cast<SHORT *>(&c));
else
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
*reinterpret_cast<LONG *>(&c));
#elif defined(__GNUC__)
return __sync_lock_test_and_set(a, c);
return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
#else
log_info("Host function not implemented: atomic_exchange\n");
return 0;
@@ -192,30 +277,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_failure)
{
CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
if constexpr (is_host_atomic_fp_v<AtomicType>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile cl_half *>(a);
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
{
*reinterpret_cast<volatile cl_half *>(a) = desired;
return true;
}
*expected = tmp;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
tmp = static_cast<CorrespondingType>(*a);
if (tmp == *expected)
{
*a = desired;
*a = static_cast<AtomicType>(desired);
return true;
}
*expected = tmp;
@@ -241,8 +310,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
if (sizeof(CorrespondingType) == 2)
auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
if constexpr (sizeof(CorrespondingType) == 2)
return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
else
return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
#elif defined(__GNUC__)

View File

@@ -31,11 +31,14 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
cl_device_fp_config gDoubleFPConfig = 0;
cl_device_fp_config gFloatFPConfig = 0;
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
cl_device_fp_config gHalfFPConfig = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -134,13 +137,17 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics"))
{
gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64"))
{
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
sizeof(gDoubleFPConfig), &gDoubleFPConfig,
NULL);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
}
cl_int error = clGetDeviceInfo(
@@ -148,6 +155,13 @@ test_status InitCL(cl_device_id device) {
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
test_error_ret(
error,
"Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16"))
{
cl_int error = clGetDeviceInfo(
@@ -170,6 +184,11 @@ test_status InitCL(cl_device_id device) {
log_error("Error while acquiring half rounding mode\n");
return TEST_FAIL;
}
error =
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
sizeof(gHalfFPConfig), &gHalfFPConfig, NULL);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -26,9 +26,9 @@ class DirectXWrapper {
public:
DirectXWrapper();
ID3D12Device* getDXDevice() const;
ID3D12CommandQueue* getDXCommandQueue() const;
ID3D12CommandAllocator* getDXCommandAllocator() const;
[[nodiscard]] ID3D12Device* getDXDevice() const;
[[nodiscard]] ID3D12CommandQueue* getDXCommandQueue() const;
[[nodiscard]] ID3D12CommandAllocator* getDXCommandAllocator() const;
protected:
ComPtr<ID3D12Device> dx_device = nullptr;
@@ -39,7 +39,7 @@ protected:
class DirectXFenceWrapper {
public:
DirectXFenceWrapper(ID3D12Device* dx_device);
ID3D12Fence* operator*() const { return dx_fence.Get(); }
[[nodiscard]] ID3D12Fence* get() const { return dx_fence.Get(); }
private:
ComPtr<ID3D12Fence> dx_fence = nullptr;

View File

@@ -104,7 +104,8 @@
VK_FUNC_DECL(vkGetImageSubresourceLayout) \
VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT) \
VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT) \
VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties)
VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties) \
VK_FUNC_DECL(vkGetPhysicalDeviceFeatures2)
#define VK_WINDOWS_FUNC_LIST \
VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \
VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR) \
@@ -209,5 +210,6 @@
#define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT
#define vkGetPhysicalDeviceExternalBufferProperties \
_vkGetPhysicalDeviceExternalBufferProperties
#define vkGetPhysicalDeviceFeatures2 _vkGetPhysicalDeviceFeatures2
#endif //_vulkan_api_list_hpp_

View File

@@ -147,6 +147,7 @@ VulkanInstance::VulkanInstance(bool useValidationLayers)
// return WAIVED;
}
VK_GET_NULL_INSTANCE_PROC_ADDR(vkGetPhysicalDeviceFeatures2);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
@@ -612,7 +613,8 @@ VulkanDevice::VulkanDevice(const VulkanDevice &device)
VulkanDevice::VulkanDevice(
const VulkanPhysicalDevice &physicalDevice,
const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap,
bool useShaderInt8)
: m_physicalDevice(physicalDevice), m_vkDevice(NULL)
{
uint32_t maxQueueCount = 0;
@@ -676,7 +678,55 @@ VulkanDevice::VulkanDevice(
enabledExtensionNameList.data();
vkDeviceCreateInfo.pEnabledFeatures = NULL;
if (useShaderInt8)
{
VkPhysicalDeviceShaderFloat16Int8Features int8Features{};
int8Features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
VkPhysicalDevice8BitStorageFeatures storage8Features{};
storage8Features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
int8Features.pNext = &storage8Features;
VkPhysicalDeviceFeatures2 features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &int8Features;
vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
if (!int8Features.shaderInt8
|| !storage8Features.storageBuffer8BitAccess)
{
throw std::runtime_error("shaderInt8 not supported!\n");
}
VkPhysicalDevice8BitStorageFeatures storage8Enable{};
storage8Enable.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
storage8Enable.storageBuffer8BitAccess = VK_TRUE;
VkPhysicalDeviceShaderFloat16Int8Features int8Enable{};
int8Enable.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
int8Enable.shaderInt8 = VK_TRUE;
int8Enable.pNext = &storage8Enable;
vkDeviceCreateInfo.pNext = &int8Enable;
enabledExtensionNameList.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
vkDeviceCreateInfo.ppEnabledExtensionNames =
enabledExtensionNameList.data();
vkDeviceCreateInfo.enabledExtensionCount =
(uint32_t)enabledExtensionNameList.size();
vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
}
else
{
vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
}
for (uint32_t qfIdx = 0;
qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
@@ -1071,7 +1121,8 @@ VulkanComputePipeline::VulkanComputePipeline(
VulkanComputePipeline::VulkanComputePipeline(
const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
const VulkanShaderModule &shaderModule, const std::string &entryFuncName)
const VulkanShaderModule &shaderModule, const std::string &entryFuncName,
const VkSpecializationInfo *spec)
: VulkanPipeline(device)
{
VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
@@ -1084,6 +1135,8 @@ VulkanComputePipeline::VulkanComputePipeline(
vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
if (spec) vkPipelineShaderStageCreateInfo.pSpecializationInfo = spec;
VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
vkComputePipelineCreateInfo.sType =
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;

View File

@@ -148,7 +148,8 @@ public:
VulkanDevice(
const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
getDefaultVulkanQueueFamilyToQueueCountMap());
getDefaultVulkanQueueFamilyToQueueCountMap(),
bool useShaderInt8 = false);
virtual ~VulkanDevice();
const VulkanPhysicalDevice &getPhysicalDevice() const;
VulkanQueue &
@@ -296,7 +297,8 @@ public:
VulkanComputePipeline(const VulkanDevice &device,
const VulkanPipelineLayout &pipelineLayout,
const VulkanShaderModule &shaderModule,
const std::string &entryFuncName = "main");
const std::string &entryFuncName = "main",
const VkSpecializationInfo *spec = nullptr);
virtual ~VulkanComputePipeline();
VulkanPipelineBindPoint getPipelineBindPoint() const;
};

View File

@@ -16,6 +16,7 @@
#include "testBase.h"
#include "harness/testHarness.h"
#include "harness/parseParameters.h"
#include "harness/stringHelpers.h"
#include <array>
#include <memory>
@@ -1009,3 +1010,113 @@ REGISTER_TEST(get_program_build_info)
return 0;
}
cl_int test_kernel_name_len(cl_context context, cl_device_id device,
const cl_uint length)
{
cl_int error = CL_SUCCESS;
std::string buf = { "abcdefghijklmnopqrstuvwxyz" };
std::string name;
name.reserve(length);
for (cl_uint i = 0; i < length; ++i) name += buf[i % buf.size()];
const char *sample_name_size_test_kernel = R"(
__kernel void %s(int src, __global int *dst)
{
dst[0]=src;
}
)";
std::string program_source =
str_sprintf(std::string(sample_name_size_test_kernel), name.c_str());
const char *ptr = program_source.c_str();
{
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
name.c_str());
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to create program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
// query kernel name
size_t kernel_name_size = 0;
error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr,
&kernel_name_size);
test_error(error, "clGetKernelInfo (size) failed");
std::vector<char> kernel_name(kernel_name_size);
error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
kernel_name_size, kernel_name.data(), nullptr);
test_error(error, "clGetKernelInfo (name) failed");
if (name != std::string(kernel_name.data()))
{
log_error("Kernel name mismatch! expected=%s got=%s\n",
name.c_str(), kernel_name.data());
return TEST_FAIL;
}
}
if (gCompilationMode == kOnline)
{
clProgramWrapper programObj =
clCreateProgramWithSource(context, 1, &ptr, nullptr, &error);
test_error(error, "clCreateProgramWithSource failed (compile)");
error = clCompileProgram(programObj, 0, nullptr, nullptr, 0, nullptr,
nullptr, nullptr, nullptr);
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to compile program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
clProgramWrapper linkedProgram =
clLinkProgram(context, 0, nullptr, nullptr, 1, &programObj, nullptr,
nullptr, &error);
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to link program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
clKernelWrapper kernel =
clCreateKernel(linkedProgram, name.c_str(), &error);
test_error(error, "clCreateKernel after link failed");
}
return TEST_PASS;
}
REGISTER_TEST(kernel_name_size)
{
for (cl_uint len = 32; len <= 2048; len *= 2)
{
cl_int status = test_kernel_name_len(context, device, len);
if (status == TEST_FAIL)
{
log_error("ERROR: test_kernel_name_len failed with length %d\n",
len);
return TEST_FAIL;
}
}
return TEST_PASS;
}

View File

@@ -14,7 +14,11 @@
// limitations under the License.
//
#include "testBase.h"
#include "harness/kernelHelpers.h"
#include "harness/os_helpers.h"
#include "harness/testHarness.h"
#include <array>
const char *preprocessor_test_kernel[] = {
"__kernel void sample_test(__global int *dst)\n"
@@ -42,26 +46,22 @@ const char *include_test_kernel[] = {
"\n"
"}\n" };
const char *options_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" size_t tid = get_global_id(0);\n"
" dst[tid] = (int)src[tid];\n"
"}\n"
};
const char *options_test_kernel[] = { "__kernel void sample_test() {}\n" };
const char *optimization_options[] = {
"-cl-single-precision-constant",
"-cl-denorms-are-zero",
"-cl-opt-disable",
"-cl-mad-enable",
"-cl-no-signed-zeros",
"-cl-unsafe-math-optimizations",
"-cl-finite-math-only",
"-cl-fast-relaxed-math",
"-w",
"-Werror",
};
std::array optimization_options{
std::pair{ "-cl-single-precision-constant", Version(1, 0) },
std::pair{ "-cl-denorms-are-zero", Version(1, 0) },
std::pair{ "-cl-opt-disable", Version(1, 0) },
std::pair{ "-cl-mad-enable", Version(1, 0) },
std::pair{ "-cl-no-signed-zeros", Version(1, 0) },
std::pair{ "-cl-unsafe-math-optimizations", Version(1, 0) },
std::pair{ "-cl-finite-math-only", Version(1, 0) },
std::pair{ "-cl-fast-relaxed-math", Version(1, 0) },
std::pair{ "-w", Version(1, 0) },
std::pair{ "-Werror", Version(1, 0) },
std::pair{ "-cl-uniform-work-group-size", Version(2, 0) },
std::pair{ "-cl-no-subgroup-ifp", Version(2, 1) },
};
cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
{
@@ -93,31 +93,44 @@ REGISTER_TEST(options_build_optimizations)
int error;
cl_build_status status;
for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) {
Version version = get_device_cl_version(device);
clProgramWrapper program;
error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]);
if( program == NULL || error != CL_SUCCESS )
for (const auto &optimization_option : optimization_options)
{
log_error( "ERROR: Unable to create reference program!\n" );
if (version < optimization_option.second)
{
continue;
}
auto build_options = std::string("-cl-std=CL")
+ get_max_OpenCL_C_for_context(context).to_string() + " "
+ optimization_option.first;
const char *option = build_options.c_str();
clProgramWrapper program;
error = create_single_kernel_helper_create_program(
context, &program, 1, options_test_kernel, option);
if (program == NULL || error != CL_SUCCESS)
{
log_error("ERROR: Unable to create reference program!\n");
return -1;
}
/* Build with the macro defined */
log_info("Testing optimization option '%s'\n", optimization_options[i]);
error = clBuildProgram(program, 1, &device, optimization_options[i],
NULL, NULL);
test_error( error, "Test program did not properly build" );
log_info("Testing optimization option '%s'\n", option);
error = clBuildProgram(program, 1, &device, option, NULL, NULL);
test_error(error, "Test program did not properly build");
error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
sizeof(status), &status, NULL);
test_error( error, "Unable to get program build status" );
test_error(error, "Unable to get program build status");
if( (int)status != CL_BUILD_SUCCESS )
if ((int)status != CL_BUILD_SUCCESS)
{
log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]);
print_error( error, "Failed to build with optimization defined")
return -1;
log_info(
"Building with optimization option '%s' failed to compile!\n",
option);
print_error(error,
"Failed to build with optimization defined") return -1;
}
}
return 0;
@@ -415,3 +428,53 @@ REGISTER_TEST(options_denorm_cache)
return 0;
}
REGISTER_TEST(options_uniform_work_group_size)
{
if (get_device_cl_version(device) < Version(2, 0))
{
return TEST_SKIPPED_ITSELF;
}
std::string build_options = "-cl-std=CL"
+ get_max_OpenCL_C_for_context(context).to_string()
+ " -cl-uniform-work-group-size";
const char *options = build_options.c_str();
clProgramWrapper program;
int error = create_single_kernel_helper_create_program(
context, &program, 1, options_test_kernel, options);
if (program == NULL || error != CL_SUCCESS)
{
log_error("Error: Unable to create reference program!\n");
return TEST_FAIL;
}
error = clBuildProgram(program, 1, &device, options, NULL, NULL);
test_error(error, "Test program did not properly build");
clKernelWrapper kernel = clCreateKernel(program, "sample_test", &error);
test_error(error, "Unable to create kernel");
size_t global_work_size = 4;
size_t uniform_local_work_size = 2;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
&uniform_local_work_size, 0, NULL, NULL);
test_error(error,
"Unable to enqueue NDRange kernel with uniform work group size");
error = clFinish(queue);
test_error(error, "Unable to finish");
size_t non_uniform_local_work_size = 3;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
&non_uniform_local_work_size, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE)
{
log_error(
"Error: expected error 'CL_INVALID_WORK_GROUP_SIZE' (got '%s') "
"trying to enqueue kernel compiled with '%s' with non-uniform work "
"group size\n",
IGetErrorString(error), options);
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -3983,6 +3983,9 @@ REGISTER_TEST(multiple_build_program)
error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
}
{
@@ -4001,10 +4004,10 @@ REGISTER_TEST(multiple_build_program)
error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
}
error = clFinish(queue);
test_error(error, "clFinish failed");
}
std::vector<cl_int> test_values(num_threads, 0);
error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,

View File

@@ -56,6 +56,7 @@ const char *known_extensions[] = {
"cl_khr_integer_dot_product",
"cl_khr_subgroup_rotate",
"cl_khr_kernel_clock",
"cl_khr_icd_unloadable",
// API-only extensions after this point. If you add above here, modify
// first_API_extension below.
"cl_khr_icd",

View File

@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
}
#endif
#ifdef __PPC__
#if defined(__PPC__) || defined(__riscv)
float ppc_mul(float a, float b)
{
float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
// turn that off
f3[i] = sse_mul(q, q2);
f4[i] = sse_mul(-q, q2);
#elif defined(__PPC__)
// None of the current generation PPC processors support HW
// FTZ, emulate it in sw.
#elif (defined(__PPC__) || defined(__riscv))
// RISC-V CPUs with default 'f' fp32 extension do not support
// enabling/disabling FTZ mode, subnormals are always handled
// without FTZ. None of the current generation PPC processors
// support HW FTZ, emulate it in sw.
f3[i] = ppc_mul(q, q2);
f4[i] = ppc_mul(-q, q2);
#else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
skipTest[j][i] = (bufSkip[i] ||
(gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
#if defined(__PPC__)
// Since the current Power processors don't emulate flush to zero in HW,
// it must be emulated in SW instead.
#if defined(__PPC__) || defined(__riscv)
// Since the current Power processors don't emulate flush to
// zero in HW, it must be emulated in SW instead. (same for
// RISC-V CPUs with 'f' extension)
if (gForceFTZ)
{
if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
}
}
double *f = (double*) buf1;
double *f2 = (double*) buf2;
double *f3 = (double*) buf3_double;

View File

@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
uint64_t GetTime(void);
void WriteInputBufferComplete(void *);
void *FlushToZero(void);
void UnFlushToZero(void *);
}
struct CalcRefValsBase

View File

@@ -340,7 +340,7 @@ static const char* enqueue_block_capture_event_profiling_info_before_execution[]
set_user_event_status(user_evt, CL_COMPLETE);
void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };
void (^checkBlock) (void) = ^{ check_res(tid, value, res); };
enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }

View File

@@ -129,8 +129,8 @@ static const char *helper_ndrange_2d_glo[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -156,8 +156,8 @@ static const char *helper_ndrange_2d_loc[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -193,8 +193,8 @@ static const char *helper_ndrange_2d_ofs[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -233,8 +233,8 @@ static const char *helper_ndrange_3d_glo[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -266,8 +266,8 @@ static const char *helper_ndrange_3d_loc[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -306,8 +306,8 @@ static const char *helper_ndrange_3d_ofs[] = {
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,

View File

@@ -14,7 +14,10 @@
//
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include <cinttypes>
#include <vector>
#include <string>
#define BUF_SIZE 1024
#define BUF_SIZE_STR "1024"
@@ -310,8 +313,8 @@ private:
// A basic buffer used to pass the other buffer's address.
error = clEnqueueWriteBuffer(queue, buffer_in_long,
CL_TRUE, // block
0, sizeof(cl_long), &DeviceAddrFromAPI,
0, NULL, NULL);
0, sizeof(DeviceAddrFromAPI),
&DeviceAddrFromAPI, 0, NULL, NULL);
test_error_fail(error,
"clEnqueueWriteBuffer of dev_addr_buffer failed\n");
@@ -322,9 +325,9 @@ private:
&buffer_out_int);
test_error_fail(error, "clSetKernelArg 1 failed\n");
error = clSetKernelExecInfo(ind_access_kernel,
CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
sizeof(void *), &DeviceAddrFromAPI);
error = clSetKernelExecInfo(
ind_access_kernel, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
sizeof(DeviceAddrFromAPI), &DeviceAddrFromAPI);
test_error_fail(error,
"Setting indirect access for "
"device ptrs failed!\n");
@@ -421,6 +424,8 @@ private:
REGISTER_TEST(private_address)
{
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest(
device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT);
@@ -435,3 +440,180 @@ REGISTER_TEST(private_address)
return TEST_PASS;
}
REGISTER_TEST(private_address_multi_device)
{
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
cl_platform_id platform = 0;
cl_int error = CL_SUCCESS;
cl_uint numDevices = 0;
error = clGetPlatformIDs(1, &platform, NULL);
test_error_ret(error, "Unable to get platform\n", TEST_FAIL);
/* Get some devices */
error =
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
if (numDevices < 2)
{
log_info(
"WARNING: multi device test unable to get multiple devices via "
"CL_DEVICE_TYPE_ALL (got %u devices). Skipping test...\n",
numDevices);
return TEST_SKIPPED_ITSELF;
}
std::vector<cl_device_id> devices(numDevices);
error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices,
devices.data(), &numDevices);
test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
GET_PFN(devices[0], clSetKernelArgDevicePointerEXT);
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM,
(cl_context_properties)platform, 0 };
clContextWrapper ctx = clCreateContext(
properties, numDevices, devices.data(), nullptr, nullptr, &error);
test_error_ret(error, "Unable to create context\n", TEST_FAIL);
/* Create buffer */
cl_mem_properties props[] = { CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT, CL_TRUE,
0 };
clMemWrapper buffer = clCreateBufferWithProperties(
ctx, props, CL_MEM_READ_WRITE, 16, nullptr, &error);
std::vector<cl_mem_device_address_ext> addresses(numDevices);
error =
clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
sizeof(cl_mem_device_address_ext) * addresses.size(),
addresses.data(), nullptr);
test_error_ret(error, "clGetMemObjectInfo failed\n", TEST_FAIL);
std::vector<clCommandQueueWrapper> queues(numDevices);
for (cl_uint i = 0; i < numDevices; ++i)
{
queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &error);
test_error_ret(error, "Unable to create command queue\n", TEST_FAIL);
}
static std::string source = R"(
void kernel test_device_address(
global ulong* ptr,
ulong value)
{
*ptr = value;
})";
clProgramWrapper program;
clKernelWrapper kernel;
const char *source_ptr = source.data();
error = create_single_kernel_helper(ctx, &program, &kernel, 1, &source_ptr,
"test_device_address");
test_error(error, "Unable to create test kernel");
for (cl_uint i = 0; i < numDevices; ++i)
{
cl_command_queue queue = queues[i];
error = clSetKernelArgDevicePointerEXT(kernel, 0, 0);
test_error_fail(error,
"clSetKernelArgDevicePointerEXT failed with NULL "
"pointer argument\n");
error = clSetKernelArgDevicePointerEXT(kernel, 0, addresses[i] + 8);
test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
const cl_ulong pattern = 0xAABBCCDDEEFF0011 + i;
error = clSetKernelArg(kernel, 1, sizeof(pattern), &pattern);
test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
size_t gwo = 0;
size_t gws = 1;
size_t lws = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0,
nullptr, nullptr);
test_error_ret(error, "Unable to enqueue kernel\n", TEST_FAIL);
error = clFinish(queue);
test_error_ret(error, "clFinish failed\n", TEST_FAIL);
std::vector<cl_ulong> results(2, 0);
error = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
results.size() * sizeof(cl_ulong),
results.data(), 0, nullptr, nullptr);
test_error_ret(error, "clEnqueueReadBuffer failed\n", TEST_FAIL);
if (results[1] != pattern)
test_fail("Test value doesn't match expected value\n");
}
return TEST_PASS;
}
REGISTER_TEST(negative_private_address)
{
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
cl_int error = CL_SUCCESS;
GET_PFN(device, clSetKernelArgDevicePointerEXT);
/* Create buffer */
clMemWrapper buffer = clCreateBufferWithProperties(
context, nullptr, CL_MEM_READ_WRITE, 16, nullptr, &error);
cl_mem_device_address_ext address;
error = clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
sizeof(cl_mem_device_address_ext), &address,
nullptr);
test_failure_error_ret(
error, CL_INVALID_OPERATION,
"clGetMemObjectInfo should return CL_INVALID_OPERATION when: "
"\"the buffer was not created with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT\"",
TEST_FAIL);
static std::string source = R"(
void kernel test_device_address(
global ulong* ptr,
local ulong* ptr2,
ulong value)
{
*ptr = value;
})";
clProgramWrapper program;
clKernelWrapper kernel;
const char *source_ptr = source.data();
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source_ptr, "test_device_address");
test_error(error, "Unable to create test kernel");
error = clSetKernelArgDevicePointerEXT(nullptr, 0, 0);
test_failure_error_ret(
error, CL_INVALID_KERNEL,
"clSetKernelArgDevicePointerEXT should return CL_INVALID_KERNEL when: "
"\"kernel is not a valid kernel object\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 1, 0x15465);
test_failure_error_ret(
error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"the expected kernel argument is not a pointer to global memory\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 2, 0x15465);
test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"the expected kernel argument is not a pointer\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 3, 0x15465);
test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"arg_index is not a valid argument index\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -42,8 +42,8 @@ int test_cxx_for_opencl(cl_device_id device, cl_context context,
execute(*p, x);
})";
error = create_single_kernel_helper_with_build_options(
context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++");
error = create_single_kernel_helper(context, &program, &kernel1, 1,
&kernel_sstr, "k1", "-cl-std=CLC++");
test_error(error, "Failed to create k1 kernel");
kernel2 = clCreateKernel(program, "k2", &error);

View File

@@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_event_info.cpp
command_buffer_finalize.cpp
command_buffer_pipelined_enqueue.cpp
command_buffer_kernel_attributes.cpp
negative_command_buffer_finalize.cpp
negative_command_buffer_svm_mem.cpp
negative_command_buffer_copy_image.cpp

View File

@@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES
mutable_command_work_groups.cpp
mutable_command_work_dim.cpp
mutable_command_update_state.cpp
mutable_command_defer_arguments.cpp
../basic_command_buffer.cpp
)

View File

@@ -76,12 +76,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
bool Skip() override
{
bool extension_avaliable =
is_extension_available(device,
"cl_khr_command_buffer_mutable_dispatch")
== true;
bool extension_available = is_extension_available(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_avaliable)
if (extension_available)
{
Version device_version = get_device_cl_version(device);
if ((device_version >= Version(3, 0))
@@ -91,12 +89,12 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 4))
if (extension_version != CL_MAKE_VERSION(0, 9, 5))
{
log_info("cl_khr_command_buffer_mutable_dispatch version "
"0.9.4 is "
"0.9.5 is "
"required to run the test, skipping.\n ");
extension_avaliable = false;
extension_available = false;
}
}
}
@@ -109,7 +107,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities != 0;
return !mutable_support || !extension_avaliable
return !mutable_support || !extension_available
|| BasicCommandBufferTest::Skip();
}

View File

@@ -0,0 +1,219 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "mutable_command_basic.h"
namespace {
////////////////////////////////////////////////////////////////////////////////
// Mutable dispatch test which handles the case where all the arguments of a
// kernel aren't set when a kernel is initially added to a mutable
// command-buffer, but deferred until an update is made to the command to set
// them before command-buffer enqueue.
struct MutableDispatchDeferArguments : public BasicMutableCommandBufferTest
{
MutableDispatchDeferArguments(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue)
{}
bool Skip() override
{
if (BasicMutableCommandBufferTest::Skip()) return true;
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
// require mutable arguments capability
return !mutable_support;
}
cl_int SetUpKernel() override
{
// Create kernel
const char *defer_args_kernel =
R"(
__kernel void defer_args_test(__constant int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
})";
cl_int error =
create_single_kernel_helper(context, &program, &kernel, 1,
&defer_args_kernel, "defer_args_test");
test_error(error, "Creating kernel failed");
return CL_SUCCESS;
}
cl_int SetUpKernelArgs() override
{
// Create and initialize buffers
MTdataHolder d(gRandomSeed);
src_data.resize(num_elements);
for (size_t i = 0; i < num_elements; i++)
src_data[i] = (cl_int)genrand_int32(d);
cl_int error = CL_SUCCESS;
in_mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
num_elements * sizeof(cl_int), src_data.data(),
&error);
test_error(error, "Creating src buffer");
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * sizeof(cl_int), NULL, &error);
test_error(error, "Creating initial dst buffer failed");
// Only set a single kernel argument, leaving argument at index 1 unset
error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
test_error(error, "Unable to set src kernel arguments");
return CL_SUCCESS;
}
bool verify_state(cl_command_buffer_state_khr expected)
{
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), &state,
nullptr);
if (error != CL_SUCCESS)
{
log_error("clGetCommandBufferInfoKHR failed: %d", error);
return false;
}
if (state != expected)
{
log_error("Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query. "
"Expected %u, but was %u\n",
expected, state);
return false;
}
return true;
}
bool verify_result(const cl_mem &buffer)
{
std::vector<cl_int> data(num_elements);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
if (data[i] != src_data[i])
{
log_error("Modified verification failed at index %zu: Got %d, "
"wanted %d\n",
i, data[i], src_data[i]);
return false;
}
}
return true;
}
cl_int Run() override
{
// Create command while the kernel still has the second argument unset.
// Passing 'CL_MUTABLE_DISPATCH_ARGUMENTS_KHR' as a property means this
// shouldn't be an error.
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
// Finalizing the command buffer shouldn't be an error, but result in
// the command-buffer entering the CL_COMMAND_BUFFER_STATE_FINALIZED
// state.
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
if (!verify_state(CL_COMMAND_BUFFER_STATE_FINALIZED_KHR))
{
return TEST_FAIL;
}
// Check that trying to enqueue the command-buffer in this state is an
// error, as it needs to be in the executable state.
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clEnqueueCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
// Update the kernel command to set the missing argument.
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(out_mem), &out_mem };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Now that all the arguments have been set, verify the
// command-buffer has entered the executable state.
if (!verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR))
{
return TEST_FAIL;
}
// Execute command-buffer and verify results are expected
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
if (!verify_result(out_mem)) return TEST_FAIL;
return TEST_PASS;
}
cl_mutable_command_khr command;
std::vector<cl_int> src_data;
};
} // anonymous namespace
REGISTER_TEST(mutable_dispatch_defer_arguments)
{
return MakeAndRunTest<MutableDispatchDeferArguments>(device, context, queue,
num_elements);
}

View File

@@ -20,6 +20,7 @@
#include <CL/cl.h>
#include <CL/cl_ext.h>
#include <algorithm>
#include <vector>
namespace {
@@ -126,7 +127,20 @@ struct MutableCommandFullDispatch : InfoMutableCommandBufferTest
&workgroupinfo_size, NULL);
test_error(error, "clGetKernelWorkGroupInfo failed");
group_size = std::min(num_elements, workgroupinfo_size);
cl_uint max_work_dimension = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(max_work_dimension), &max_work_dimension,
NULL);
test_error(error, "clGetDeviceInfo failed");
std::vector<size_t> max_work_item_sizes(max_work_dimension, 0);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * max_work_item_sizes.size(),
max_work_item_sizes.data(), NULL);
test_error(error, "clGetDeviceInfo failed");
group_size = std::min(
{ num_elements, workgroupinfo_size, max_work_item_sizes[0] });
const size_t size_to_allocate_src = group_size * sizeof(cl_int);
// create and initialize source buffer

View File

@@ -0,0 +1,235 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "basic_command_buffer.h"
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests for cl_khr_command_buffer while enqueueing a kernel with a
// reqd_work_group_size with a NULL local_work_size.
struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest
{
inline static const std::string body_str = R"(
__kernel void wg_size(__global int* dst)
{
if (get_global_id(0) == 0 &&
get_global_id(1) == 0 &&
get_global_id(2) == 0) {
dst[0] = get_local_size(0);
dst[1] = get_local_size(1);
dst[2] = get_local_size(2);
}
}
)";
KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), dst(nullptr),
clGetKernelSuggestedLocalWorkSizeKHR(nullptr),
device_max_work_group_size(0)
{}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
{
cl_platform_id platform = nullptr;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for platform failed");
clGetKernelSuggestedLocalWorkSizeKHR =
(clGetKernelSuggestedLocalWorkSizeKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSuggestedLocalWorkSizeKHR");
test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
"Couldn't get function pointer for "
"clGetKernelSuggestedLocalWorkSizeKHR");
}
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_uint device_max_dim = 0;
error =
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(device_max_dim), &device_max_dim, nullptr);
test_error(
error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
test_assert_error(
device_max_dim >= 3,
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
device_max_work_item_sizes.resize(device_max_dim);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * device_max_dim,
device_max_work_item_sizes.data(), nullptr);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(device_max_work_group_size),
&device_max_work_group_size, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = CL_SUCCESS;
struct KernelAttribInfo
{
cl_int wgs[3];
cl_uint min_dim;
};
std::vector<KernelAttribInfo> attribs = { { { 2, 1, 1 }, 1 },
{ { 2, 3, 1 }, 2 },
{ { 2, 3, 4 }, 3 } };
for (auto& attrib : attribs)
{
const std::string attrib_str =
"__attribute__((reqd_work_group_size("
+ std::to_string(attrib.wgs[0]) + ","
+ std::to_string(attrib.wgs[1]) + ","
+ std::to_string(attrib.wgs[2]) + ")))";
const std::string source_str = attrib_str + body_str;
const char* source = source_str.c_str();
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "wg_size");
test_error(error, "Unable to create test kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
{
const size_t test_work_group_size =
attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
|| (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
|| (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size)
{
log_info(
"Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1],
device_max_work_item_sizes[2],
device_max_work_group_size);
continue;
}
const cl_int zero = 0;
error = clCommandFillBufferKHR(
command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero),
0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
global_work_size, nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
cl_int results[3] = { -1, -1, -1 };
error =
clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the result
if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
|| results[2] != attrib.wgs[2])
{
log_error(
"Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
results[0], results[1], results[2]);
return TEST_FAIL;
}
if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
{
size_t suggested[3] = { 1, 1, 1 };
error = clGetKernelSuggestedLocalWorkSizeKHR(
queue, kernel, work_dim, nullptr, global_work_size,
suggested);
test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed");
if (suggested[0] != (size_t)attrib.wgs[0]
|| suggested[1] != (size_t)attrib.wgs[1]
|| suggested[2] != (size_t)attrib.wgs[2])
{
log_error(
"Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
work_dim, attrib.wgs[0], attrib.wgs[1],
attrib.wgs[2], suggested[0], suggested[1],
suggested[2]);
return TEST_FAIL;
}
}
// create new command buffer
command_buffer =
clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
}
return CL_SUCCESS;
}
clMemWrapper dst;
clGetKernelSuggestedLocalWorkSizeKHR_fn
clGetKernelSuggestedLocalWorkSizeKHR;
size_t device_max_work_group_size;
std::vector<size_t> device_max_work_item_sizes;
};
} // anonymous namespace
REGISTER_TEST(command_null_required_work_group_size)
{
return MakeAndRunTest<KernelAttributesReqGroupSizeTest>(
device, context, queue, num_elements);
}

View File

@@ -23,9 +23,32 @@
#include <android/hardware_buffer.h>
#include "debug_ahb.h"
static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage)
static bool isAHBUsageReadableHost(AHardwareBuffer_UsageFlags usage)
{
return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0;
return (AHARDWAREBUFFER_USAGE_CPU_READ_MASK & usage) != 0;
}
static bool isAHBUsageWritableHost(AHardwareBuffer_UsageFlags usage)
{
return (AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK & usage) != 0;
}
static bool isAHBUsageReadableDevice(const AHardwareBuffer_UsageFlags usage)
{
return ((AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA)
& usage)
!= 0;
}
static cl_ulong getMaxAllocSize(cl_device_id device)
{
cl_ulong ret;
cl_int err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), &ret, nullptr);
test_error(err, "clGetDeviceInfo failed");
return ret;
}
struct ahb_format_table
@@ -50,6 +73,32 @@ ahb_image_size_table test_sizes[] = {
{ 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 }
};
uint32_t test_buffer_sizes[] = { 2, 8, 32, 128, 512, 2048, 16384, 65536 };
ahb_usage_table test_buffer_usages[] = {
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }
};
ahb_usage_table test_usages[] = {
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
@@ -96,6 +145,23 @@ static const char *diff_images_kernel_source = {
})"
};
static const char *lifetime_kernel_source = {
R"(
__kernel void increment_buffer(global uchar* buffer)
{
int tid = get_global_id(0);
buffer[tid] ++;
}
__kernel void set_image_color(write_only image2d_t ahb_image, float4 set_color)
{
int tidX = get_global_id(0);
int tidY = get_global_id(1);
write_imagef(ahb_image, (int2)( tidX, tidY ), set_color);
})"
};
// Checks that the inferred image format is correct
REGISTER_TEST(images)
{
@@ -217,8 +283,9 @@ REGISTER_TEST(images_read)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -512,8 +579,9 @@ REGISTER_TEST(enqueue_read_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -690,8 +758,9 @@ REGISTER_TEST(enqueue_copy_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -993,8 +1062,9 @@ REGISTER_TEST(enqueue_copy_image_to_buffer)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -1181,8 +1251,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -1222,8 +1292,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width;
imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height
* pixelSize; // data is tightly packed in buffer
// data is tightly packed in buffer
imageInfo.rowPitch = resolution.width * pixelSize;
test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width,
"Row pitch is smaller than width");
@@ -1376,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -1429,8 +1499,8 @@ REGISTER_TEST(enqueue_write_image)
imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width;
imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height
* pixelSize; // Data is tightly packed
// Data is tightly packed
imageInfo.rowPitch = resolution.width * pixelSize;
test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width,
"Row pitch is smaller than width");
@@ -1568,8 +1638,8 @@ REGISTER_TEST(enqueue_fill_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages)
{
// Filter out usage flags that are not readable on device
if (!isAHBUsageReadable(usage.usageFlags))
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
@@ -1620,8 +1690,8 @@ REGISTER_TEST(enqueue_fill_image)
imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width;
imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height
* pixelSize; // Data is tightly packed
imageInfo.rowPitch = resolution.width * pixelSize;
// Data is tightly packed
test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width,
"Row pitch is smaller than width");
@@ -1857,3 +1927,412 @@ REGISTER_TEST(blob)
return TEST_PASS;
}
/*
* For cl buffer and cl image
* Create a AHB
* Create a mem object from the AHB
* Release the AHB
* Read and write using the mem object
* Verify reads and writes
*/
REGISTER_TEST(lifetime_buffer)
{
REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
cl_int err;
constexpr cl_uint buffer_size = 4096;
std::vector<uint8_t> host_buffer(buffer_size, 1);
clMemWrapper imported_buffer;
{
// Check if AHB descriptors for buffers and images are supported
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
log_info("Testing buffer lifetime\n");
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
const cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(),
0,
};
imported_buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
// Fill AHB buffer
void *data_ptr = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr,
&data_ptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n", ahb_result);
return TEST_FAIL;
}
memcpy(data_ptr, host_buffer.data(), buffer_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
} // Release test scope reference to AHB
// Verify buffer read by comparing to host buffer
std::vector<uint8_t> read_buffer(buffer_size);
err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
read_buffer.data(), 0, nullptr, nullptr);
test_error(err, "failed clEnqueueReadBuffer");
for (size_t i = 0; i < buffer_size; i++)
{
if (read_buffer[i] != host_buffer[i])
{
log_error("At position %zu expected value: %u but got value: %u\n",
i, host_buffer[i], read_buffer[i]);
return TEST_FAIL;
}
}
// Attempt buffer write
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&lifetime_kernel_source,
"increment_buffer");
test_error(err, "kernel creation failed");
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_buffer);
test_error(err, "clSetKernelArg failed");
size_t gws[1] = { buffer_size };
err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, gws, nullptr, 0,
nullptr, nullptr);
test_error(err, "Failed clEnqueueNDRangeKernel");
// Verify write
err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
read_buffer.data(), 0, nullptr, nullptr);
test_error(err, "failed clEnqueueReadBuffer");
for (size_t i = 0; i < buffer_size; i++)
{
if (read_buffer[i]
!= host_buffer[i] + 1) // Kernel incremented each index by 1
{
log_error("At position %zu expected value: %u but got value: %u\n",
i, host_buffer[i], read_buffer[i]);
return TEST_FAIL;
}
}
return TEST_PASS;
}
REGISTER_TEST(lifetime_image)
{
REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
int err;
const AHardwareBuffer_Format aHardwareBufferFormat =
AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
const cl_image_format clImageFormat = { CL_RGBA, CL_UNORM_INT8 };
const size_t pixel_size = get_pixel_size(&clImageFormat);
for (auto resolution : test_sizes)
{
const size_t image_size =
resolution.width * resolution.height * pixel_size;
std::vector<uint8_t> host_image_data(image_size, 1);
clMemWrapper imported_image;
{
// Check if AHB descriptors for buffers and images are supported
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = resolution.width;
aHardwareBufferDesc.height = resolution.height;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = aHardwareBufferFormat;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
continue;
}
log_info("Testing image lifetime\n");
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
const cl_mem_properties props_image[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(),
0,
};
imported_image = clCreateImageWithProperties(
context, props_image, CL_MEM_READ_WRITE, nullptr, nullptr,
nullptr, &err);
test_error(err, "Failed to create CL image from AHardwareBuffer");
void *data_ptr = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &data_ptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
memcpy(data_ptr, host_image_data.data(), image_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
} // Release test scope reference to AHB
// Verify image read using host data
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { resolution.width, resolution.height, 1 };
size_t row_pitch;
uint8_t *mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
queue, imported_image, true, CL_MAP_READ, origin, region,
&row_pitch, nullptr, 0, nullptr, nullptr, &err));
test_error(err, "clEnqueueMapImage failed");
for (size_t row = 0; row < resolution.height; ++row)
{
for (size_t col = 0; col < resolution.width; ++col)
{
size_t mapped_image_idx = row * row_pitch + col;
size_t host_image_idx = row * resolution.width + col;
if (mapped_image_ptr[mapped_image_idx]
!= host_image_data[host_image_idx])
{
log_error(
"At position (%zu, %zu) expected value: %u but got "
"value: %u\n",
row, col, host_image_data[host_image_idx],
mapped_image_ptr[mapped_image_idx]);
return TEST_FAIL;
}
}
}
err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
0, nullptr, nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queue);
test_error(err, "clFinish failed");
// Attempt image write
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&lifetime_kernel_source,
"set_image_color");
test_error(err, "kernel creation failed");
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image);
test_error(err, "clSetKernelArg failed");
cl_float4 color = { { 0.5f, 0.5f, 0.5f, 0.5f } };
err = clSetKernelArg(kernel, 1, sizeof(cl_float4), &color);
test_error(err, "clSetKernelArg failed");
std::vector<size_t> gws = { resolution.width, resolution.height };
err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, gws.data(),
nullptr, 0, nullptr, nullptr);
test_error(err, "Failed clEnqueueNDRangeKernel");
err = clFinish(queue);
test_error(err, "clFinish failed");
// Verify image write
mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
queue, imported_image, true, CL_MAP_READ, origin, region,
&row_pitch, nullptr, 0, nullptr, nullptr, &err));
test_error(err, "clEnqueueMapImage failed");
for (size_t row = 0; row < resolution.height; ++row)
{
for (size_t col = 0; col < resolution.width; ++col)
{
size_t mapped_image_idx = row * row_pitch + col;
if (128 != mapped_image_ptr[mapped_image_idx])
{
log_error(
"At position (%zu, %zu) expected value: %u but got "
"value: %u\n",
row, col, 128, mapped_image_ptr[mapped_image_idx]);
return TEST_FAIL;
}
}
}
err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
0, nullptr, nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queue);
test_error(err, "clFinish failed");
}
return TEST_PASS;
}
/* Testing clCreateSubBuffer
* Create AHB
* Write to AHB
* Create CL buffer from AHB
* Create a sub buffer into half of the buffer
* Read & verify sub buffer
*/
REGISTER_TEST(sub_buffer)
{
cl_int err;
RandomSeed seed(gRandomSeed);
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
for (auto usage : test_buffer_usages)
{
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
aHardwareBufferDesc.usage = usage.usageFlags;
for (uint32_t buffer_size : test_buffer_sizes)
{
if (buffer_size > getMaxAllocSize(device))
{
continue;
}
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
continue;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info("Testing usage: %s, buffer size: %u\n",
ahardwareBufferDecodeUsageFlagsToString(usage.usageFlags)
.c_str(),
buffer_size);
void *hardware_buffer_data = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
std::vector<uint8_t> host_buffer(buffer_size);
generate_random_data(ExplicitType::kUnsignedChar, buffer_size, seed,
host_buffer.data());
memcpy(hardware_buffer_data, host_buffer.data(), buffer_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(), 0
};
clMemWrapper buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
cl_uint sub_buffer_size = buffer_size / 2;
cl_buffer_region region = { 0 };
region.origin = 0;
region.size = sub_buffer_size;
clMemWrapper sub_buffer =
clCreateSubBuffer(buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
test_error(err, "clCreateSubBuffer failed");
std::vector<uint8_t> host_sub_buffer(sub_buffer_size);
err = clEnqueueReadBuffer(queue, sub_buffer, true, 0,
sub_buffer_size, host_sub_buffer.data(),
0, nullptr, nullptr);
test_error(err, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < sub_buffer_size; ++i)
{
if (host_buffer[i] != host_sub_buffer[i])
{
log_error(
"At position i=%zu expected value %u but got %u\n", i,
host_buffer[i], host_sub_buffer[i]);
return TEST_FAIL;
}
}
}
}
return TEST_PASS;
}

View File

@@ -3,7 +3,6 @@ set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE)
set(${MODULE_NAME}_SOURCES
main.cpp
test_external_semaphore.cpp
test_external_semaphore_sync_fd.cpp
)
set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
@@ -18,7 +17,6 @@ include_directories (${CLConform_INCLUDE_DIR})
list(APPEND CLConform_LIBRARIES vulkan_wrapper)
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
set(CMAKE_CXX_FLAGS "-fpermissive")
include_directories("../../common/vulkan_wrapper")

View File

@@ -197,8 +197,11 @@ REGISTER_TEST_VERSION(external_semaphores_queries, Version(1, 2))
return TEST_PASS;
}
REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
cl_int doTestImportExport(cl_device_id device, cl_context contexts[2],
cl_command_queue queues[2])
{
cl_int err = CL_SUCCESS;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
@@ -210,7 +213,6 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
std::vector<cl_external_semaphore_handle_type_khr> import_handle_types;
std::vector<cl_external_semaphore_handle_type_khr> export_handle_types;
cl_int err = CL_SUCCESS;
err = get_device_semaphore_handle_types(
device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
import_handle_types);
@@ -237,17 +239,9 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
export_handle_types.begin(), export_handle_types.end(),
std::back_inserter(import_export_handle_types));
cl_context context2 =
clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
test_error(err, "Failed to create context2");
clCommandQueueWrapper queue1 =
clCreateCommandQueue(context, device, 0, &err);
test_error(err, "Could not create command queue");
clCommandQueueWrapper queue2 =
clCreateCommandQueue(context2, device, 0, &err);
test_error(err, "Could not create command queue");
cl_context& context2 = contexts[1];
cl_command_queue& queue1 = queues[0];
cl_command_queue& queue2 = queues[1];
if (import_export_handle_types.empty())
{
@@ -270,7 +264,7 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
// Signal semaphore on context1
cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, export_props, &err);
clCreateSemaphoreWithPropertiesKHR(contexts[0], export_props, &err);
test_error(err, "Failed to create exportable semaphore");
err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore,
@@ -313,12 +307,77 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
test_error(err, "Failed to release semaphore");
}
err = clReleaseContext(context2);
test_error(err, "Failed to release context2");
return TEST_PASS;
}
REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
{
cl_int err = CL_SUCCESS;
clContextWrapper context_sec =
clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
test_error(err, "Failed to create context2");
cl_context contexts[2] = { context, context_sec };
clCommandQueueWrapper queue0 =
clCreateCommandQueue(context, device, 0, &err);
test_error(err, "Could not create command queue");
clCommandQueueWrapper queue1 =
clCreateCommandQueue(contexts[1], device, 0, &err);
test_error(err, "Could not create command queue");
cl_command_queue queues[2] = { queue0, queue1 };
return doTestImportExport(device, contexts, queues);
}
REGISTER_TEST_VERSION(external_semaphores_import_export, Version(1, 2))
{
cl_int err = CL_SUCCESS;
cl_int total_status = TEST_PASS;
// test external semaphores with out-of-order queue
{
cl_command_queue_properties device_props = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(err,
"clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
{
// Create ooo queue
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
cl_command_queue queues[2] = { test_queue, test_queue };
cl_context contexts[2] = { context, context };
cl_int status = doTestImportExport(device, contexts, queues);
if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
{
total_status = TEST_FAIL;
}
}
}
// test external semaphore sync fd with in-order harness queue
{
cl_command_queue queues[2] = { queue, queue };
cl_context contexts[2] = { context, context };
cl_int status = doTestImportExport(device, contexts, queues);
if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
{
total_status = TEST_FAIL;
}
}
return total_status;
}
// Confirm that a signal followed by a wait will complete successfully
REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
{
@@ -366,9 +425,14 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 0, nullptr, &signal_event);
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext.getCLSemaphore(), nullptr, 0, nullptr,
&signal_event);
test_error(err, "Could not signal semaphore");
}
// Wait semaphore
clEventWrapper wait_event;
@@ -381,7 +445,11 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
test_error(err, "Could not finish queue");
// Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_event);
}
test_assert_event_complete(wait_event);
}
@@ -405,6 +473,7 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
// Obtain pointers to semaphore's API
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clReImportSemaphoreSyncFdKHR);
std::vector<VulkanExternalSemaphoreHandleType>
vkExternalSemaphoreHandleTypeList =
@@ -448,11 +517,15 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
test_error(err, "Unable to enqueue task_1");
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
// Signal semaphore (dependency on task_1)
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 1, &task_events[0],
&signal_events[0]);
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
&task_events[0], &signal_events[0]);
test_error(err, "Could not signal semaphore");
}
// In a loop
size_t loop;
@@ -473,12 +546,22 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
err = clWaitForEvents(1, &wait_events[loop - 1]);
test_error(err, "Unable to wait for wait semaphore to complete");
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clReImportSemaphoreSyncFdKHR(sema_ext.getCLSemaphore(),
nullptr, -1);
test_error(err, "Could not reimport semaphore sync fd");
}
else
{
// Signal semaphore (dependency on task_loop)
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
&task_events[loop], &signal_events[loop]);
test_error(err, "Could not signal semaphore");
}
}
// Wait semaphore
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
@@ -494,7 +577,11 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
for (loop = 0; loop < loop_count; ++loop)
{
test_assert_event_complete(wait_events[loop]);
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_events[loop]);
}
test_assert_event_complete(task_events[loop]);
}
}
@@ -536,6 +623,19 @@ static int external_semaphore_cross_queue_helper(cl_device_id device,
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList)
{
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
std::stringstream log_message;
log_message
<< "Skipping semaphore type: \""
<< vkExternalSemaphoreHandleType
<< "\"; it cannot be signaled from OpenCL when imported."
<< std::endl;
log_info("%s", log_message.str().c_str());
continue;
}
log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore(vkDevice,
vkExternalSemaphoreHandleType);
@@ -668,10 +768,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
// Signal semaphore 1
clEventWrapper signal_1_event;
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR(
queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
&signal_1_event);
test_error(err, "Could not signal semaphore");
}
// Wait semaphore 1
clEventWrapper wait_1_event;
@@ -682,10 +786,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
// Signal semaphore 2
clEventWrapper signal_2_event;
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR(
queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
&signal_2_event);
test_error(err, "Could not signal semaphore");
}
// Wait semaphore 2
clEventWrapper wait_2_event;
@@ -702,8 +810,12 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
test_error(err, "Could not finish queue");
// Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_1_event);
test_assert_event_complete(signal_2_event);
}
test_assert_event_complete(wait_1_event);
test_assert_event_complete(wait_2_event);
}
@@ -741,6 +853,19 @@ REGISTER_TEST_VERSION(external_semaphores_multi_signal, Version(1, 2))
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList)
{
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
std::stringstream log_message;
log_message
<< "Skipping semaphore type: \""
<< vkExternalSemaphoreHandleType
<< "\"; it cannot be signaled from OpenCL when imported."
<< std::endl;
log_info("%s", log_message.str().c_str());
continue;
}
log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore1(vkDevice,
vkExternalSemaphoreHandleType);
@@ -842,19 +967,23 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Signal semaphore 1
clEventWrapper signal_1_event;
err =
clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
nullptr, 0, nullptr, &signal_1_event);
clEventWrapper signal_2_event;
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
// Signal semaphore 1
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
&signal_1_event);
test_error(err, "Could not signal semaphore");
// Signal semaphore 2
clEventWrapper signal_2_event;
err =
clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(),
nullptr, 0, nullptr, &signal_2_event);
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
&signal_2_event);
test_error(err, "Could not signal semaphore");
}
// Wait semaphore 1 and 2
clEventWrapper wait_event;
@@ -869,8 +998,12 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
test_error(err, "Could not finish queue");
// Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_1_event);
test_assert_event_complete(signal_2_event);
}
test_assert_event_complete(wait_event);
}

View File

@@ -1,131 +0,0 @@
//
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include "harness/errorHelpers.h"
// Test it is possible to export a semaphore to a sync fd and import the same
// sync fd to a new semaphore
REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_semaphore"))
{
log_info(
"cl_khr_external_semaphore is not supported on this platoform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(device, "cl_khr_external_semaphore_sync_fd"))
{
log_info("cl_khr_external_semaphore_sync_fd is not supported on this "
"platoform. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_command_queue_properties device_props = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(err, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0)
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not "
"supported. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
// Create ooo queue
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create semaphore
cl_semaphore_properties_khr sema_1_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr sema_1 =
clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err);
test_error(err, "Could not create semaphore");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(test_queue, 1, &sema_1, nullptr, 0,
nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Extract sync fd
int handle = -1;
size_t handle_size;
err = clGetSemaphoreHandleForTypeKHR(sema_1, device,
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
sizeof(handle), &handle, &handle_size);
test_error(err, "Could not extract semaphore handle");
test_assert_error(sizeof(handle) == handle_size, "Invalid handle size");
test_assert_error(handle >= 0, "Invalid handle");
// Create semaphore from sync fd
cl_semaphore_properties_khr sema_2_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
static_cast<cl_semaphore_properties_khr>(handle), 0
};
cl_semaphore_khr sema_2 =
clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err);
test_error(err, "Could not create semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(test_queue, 1, &sema_2, nullptr, 0,
nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Finish
err = clFinish(test_queue);
test_error(err, "Could not finish queue");
// Check all events are completed
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Release semaphore
err = clReleaseSemaphoreKHR(sema_1);
test_error(err, "Could not release semaphore");
err = clReleaseSemaphoreKHR(sema_2);
test_error(err, "Could not release semaphore");
return TEST_PASS;
}

View File

@@ -17,6 +17,7 @@
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
return runTestHarness(
argc, argv, static_cast<int>(test_registry::getInstance().num_tests()),
test_registry::getInstance().definitions(), false, 0);
}

View File

@@ -21,81 +21,91 @@
#include "harness/errorHelpers.h"
#include "directx_wrapper.hpp"
class CLDXSemaphoreWrapper {
public:
CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
ID3D12Device* dx_device)
: device(device), context(context), dx_device(dx_device){};
int createSemaphoreFromFence(ID3D12Fence* fence)
struct DXFenceTestBase
{
DXFenceTestBase(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int num_elems)
: device(device), context(context), queue(queue), num_elems(num_elems)
{}
virtual ~DXFenceTestBase()
{
cl_int errcode = CL_SUCCESS;
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
const HRESULT hr = dx_device->CreateSharedHandle(
fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
return CL_SUCCESS;
}
~CLDXSemaphoreWrapper()
{
releaseSemaphore();
if (fence_handle)
{
CloseHandle(fence_handle);
fence_handle = nullptr;
}
};
const cl_semaphore_khr* operator&() const { return &semaphore; };
cl_semaphore_khr operator*() const { return semaphore; };
HANDLE getHandle() const { return fence_handle; };
private:
cl_semaphore_khr semaphore;
ComPtr<ID3D12Fence> fence;
HANDLE fence_handle;
cl_device_id device;
cl_context context;
ComPtr<ID3D12Device> dx_device;
int releaseSemaphore() const
if (fence_wrapper)
{
GET_PFN(device, clReleaseSemaphoreKHR);
delete fence_wrapper;
fence_wrapper = nullptr;
}
if (semaphore)
{
clReleaseSemaphoreKHR(semaphore);
semaphore = nullptr;
}
};
virtual int SetUp()
{
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_FUNCTION_EXTENSION_ADDRESS(device,
clCreateSemaphoreWithPropertiesKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
test_error(
!is_import_handle_available(CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
fence_wrapper = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
semaphore = createSemaphoreFromFence(fence_wrapper->get());
test_assert_error(!!semaphore, "Could not create semaphore");
return TEST_PASS;
}
return CL_SUCCESS;
}
};
virtual cl_int Run() = 0;
static bool
is_import_handle_available(cl_device_id device,
const cl_external_memory_handle_type_khr handle_type)
{
protected:
int errcode = CL_SUCCESS;
cl_device_id device = nullptr;
cl_context context = nullptr;
cl_command_queue queue = nullptr;
cl_int num_elems = 0;
DirectXWrapper dx_wrapper;
cl_semaphore_payload_khr semaphore_payload = 1;
cl_semaphore_khr semaphore = nullptr;
HANDLE fence_handle = nullptr;
DirectXFenceWrapper *fence_wrapper = nullptr;
clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR =
nullptr;
clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr;
clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr;
clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr;
clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr;
clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr;
clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr;
[[nodiscard]] bool is_import_handle_available(
const cl_external_memory_handle_type_khr handle_type)
{
size_t import_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
nullptr, &import_types_size);
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
0, nullptr, &import_types_size);
if (errcode != CL_SUCCESS)
{
log_error("Could not query import semaphore handle types");
@@ -114,4 +124,46 @@ is_import_handle_available(cl_device_id device,
return std::find(import_types.begin(), import_types.end(), handle_type)
!= import_types.end();
}
cl_semaphore_khr createSemaphoreFromFence(ID3D12Fence *src_fence)
{
const HRESULT hr = dx_wrapper.getDXDevice()->CreateSharedHandle(
src_fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
if (FAILED(hr)) return nullptr;
const cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
cl_semaphore_khr tmp_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
if (errcode != CL_SUCCESS) return nullptr;
return tmp_semaphore;
}
};
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
{
cl_int status = TEST_PASS;
try
{
auto test_fixture = T(device, context, queue, nelems);
status = test_fixture.SetUp();
if (status != TEST_PASS) return status;
status = test_fixture.Run();
} catch (const std::runtime_error &e)
{
log_error("%s", e.what());
return TEST_FAIL;
}
return status;
}

View File

@@ -16,37 +16,17 @@
#include "semaphore_dx_fence_base.h"
// Confirm that a signal followed by a wait in OpenCL will complete successfully
REGISTER_TEST(test_external_semaphores_signal_wait)
struct SignalWait final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
cl_int Run() override
{
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
&semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to signal semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
@@ -63,44 +43,29 @@ REGISTER_TEST(test_external_semaphores_signal_wait)
test_assert_event_complete(wait_event);
return TEST_PASS;
}
};
// Confirm that a signal followed by a wait in OpenCL will complete successfully
REGISTER_TEST(test_external_semaphores_signal_wait)
{
return MakeAndRunTest<SignalWait>(device, context, queue, num_elements);
}
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
struct SignalDXCPU final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
cl_int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_fence->Signal()\n");
const HRESULT hr = (*fence)->Signal(semaphore_payload);
const HRESULT hr = fence_wrapper->get()->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
@@ -109,45 +74,31 @@ REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
test_assert_event_complete(wait_event);
return TEST_PASS;
}
};
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
{
return MakeAndRunTest<SignalDXCPU>(device, context, queue, num_elements);
}
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
struct SignalDXGPU final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
cl_int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
const HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
const HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
fence_wrapper->get(), semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
@@ -156,49 +107,37 @@ REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
test_assert_event_complete(wait_event);
return TEST_PASS;
}
};
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
{
return MakeAndRunTest<SignalDXGPU>(device, context, queue, num_elements);
}
// Confirm that interlocking waits between OpenCL and DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
struct CLDXInterlock final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
cl_int Run() override
{
log_info("Calling d3d12_command_queue->Wait(1)\n");
cl_semaphore_payload_khr semaphore_payload = 1;
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling d3d12_command_queue->Signal(2)\n");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
&semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
@@ -215,42 +154,53 @@ REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
test_assert_event_complete(signal_event);
return TEST_PASS;
}
};
// Confirm that interlocking waits between OpenCL and DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
{
return MakeAndRunTest<CLDXInterlock>(device, context, queue, num_elements);
}
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
struct MultipleWaitSignal final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
using DXFenceTestBase::DXFenceTestBase;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
~MultipleWaitSignal() override
{
if (fence_handle_2)
{
CloseHandle(fence_handle_2);
fence_handle_2 = nullptr;
}
if (fence_wrapper_2)
{
delete fence_wrapper_2;
fence_wrapper_2 = nullptr;
}
if (semaphore_2)
{
clReleaseSemaphoreKHR(semaphore_2);
semaphore_2 = nullptr;
}
DXFenceTestBase::~DXFenceTestBase();
};
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
int SetUp() override
{
DXFenceTestBase::SetUp();
fence_wrapper_2 = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
semaphore_2 = createSemaphoreFromFence(fence_wrapper_2->get());
test_assert_error(!!semaphore_2, "Could not create semaphore");
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
return TEST_PASS;
}
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
"Could not create semaphore");
const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
"Could not create semaphore");
const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
cl_int Run() override
{
const cl_semaphore_khr semaphore_list[] = { semaphore, semaphore_2 };
cl_semaphore_payload_khr semaphore_payload_list[] = {
semaphore_payload, semaphore_payload + 1
};
@@ -263,16 +213,20 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n");
HRESULT hr =
dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
fence_wrapper_2->get(), semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
log_info(
"Calling d3d12_command_queue->Wait() with different payloads\n");
hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
semaphore_payload + 3);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper_2->get(),
semaphore_payload + 2);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
errcode = clFinish(queue);
@@ -286,29 +240,31 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr,
&signal_event);
semaphore_payload_list, 0,
nullptr, &signal_event);
test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
// Wait until the GPU has completed commands up to this fence point.
log_info("Waiting for D3D12 command queue completion\n");
if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
if (fence_wrapper->get()->GetCompletedValue()
< semaphore_payload_list[0])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
event_handle);
CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = fence_wrapper->get()->SetEventOnCompletion(
semaphore_payload_list[0], event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 1 event on completion");
WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle);
}
if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
if (fence_wrapper_2->get()->GetCompletedValue()
< semaphore_payload_list[1])
{
const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
event_handle);
CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = fence_wrapper_2->get()->SetEventOnCompletion(
semaphore_payload_list[1], event_handle);
test_error(FAILED(hr),
"Failed to set D3D12 fence 2 event on completion");
WaitForSingleObject(event_handle, INFINITE);
@@ -321,4 +277,18 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
test_assert_event_complete(signal_event);
return TEST_PASS;
}
protected:
cl_semaphore_khr semaphore_2 = nullptr;
HANDLE fence_handle_2 = nullptr;
DirectXFenceWrapper *fence_wrapper_2 = nullptr;
};
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
{
return MakeAndRunTest<MultipleWaitSignal>(device, context, queue,
num_elements);
}

View File

@@ -16,28 +16,16 @@
#include "semaphore_dx_fence_base.h"
// Confirm that a wait followed by a signal in DirectX 12 using an exported
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
struct ExportDXSignal final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
using DXFenceTestBase::DXFenceTestBase;
int Run() override
{
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
0, nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
@@ -50,14 +38,16 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
log_info(
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
@@ -66,37 +56,37 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
errcode = clGetSemaphoreInfoKHR(
exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &exportable_semaphore,
&semaphore_payload, 0, nullptr,
&wait_event);
test_error(errcode, "Failed to wait semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
ID3D12Fence *exported_fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
semaphore_handle, IID_PPV_ARGS(&exported_fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling fence->Signal()\n");
const HRESULT hr = fence->Signal(semaphore_payload);
const HRESULT hr = exported_fence->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue);
@@ -106,34 +96,31 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
test_error(clReleaseSemaphoreKHR(exportable_semaphore),
"Could not release semaphore");
exported_fence->Release();
return TEST_PASS;
}
};
// Confirm that a wait followed by a signal in DirectX 12 using an exported
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
{
return MakeAndRunTest<ExportDXSignal>(device, context, queue, num_elements);
}
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
struct ExportDXWait final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
using DXFenceTestBase::DXFenceTestBase;
int Run() override
{
size_t export_types_size = 0;
errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
nullptr, &export_types_size);
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
0, nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
@@ -146,14 +133,16 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end())
{
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
log_info(
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n");
return TEST_FAIL;
}
constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
@@ -162,13 +151,13 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr semaphore =
cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE;
errcode =
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
errcode = clGetSemaphoreInfoKHR(
exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable");
@@ -176,31 +165,33 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &exportable_semaphore,
&semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to signal semaphore");
HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
IID_PPV_ARGS(&fence));
ID3D12Fence *exported_fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
semaphore_handle, IID_PPV_ARGS(&exported_fence));
test_error(errcode, "Could not open semaphore handle");
log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(exported_fence,
semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling WaitForSingleObject\n");
if (fence->GetCompletedValue() < semaphore_payload)
if (exported_fence->GetCompletedValue() < semaphore_payload)
{
const HANDLE event =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
hr = fence->SetEventOnCompletion(semaphore_payload, event);
CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = exported_fence->SetEventOnCompletion(semaphore_payload, event);
test_error(FAILED(hr), "Failed to set event on completion");
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
@@ -213,8 +204,17 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
// Release resources
CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
fence->Release();
test_error(clReleaseSemaphoreKHR(exportable_semaphore),
"Could not release semaphore");
exported_fence->Release();
return TEST_PASS;
}
};
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
{
return MakeAndRunTest<ExportDXWait>(device, context, queue, num_elements);
}

View File

@@ -16,32 +16,12 @@
#include "semaphore_dx_fence_base.h"
// Confirm that a wait without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
struct DXFenceNegativeWait final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
@@ -50,34 +30,23 @@ REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
"Unexpected error code returned from clEnqueueWaitSemaphores");
return TEST_PASS;
}
};
// Confirm that a wait without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
{
return MakeAndRunTest<DXFenceNegativeWait>(device, context, queue,
num_elements);
}
// Confirm that a signal without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
struct DXFenceNegativeSignal final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr);
@@ -86,4 +55,13 @@ REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
"Unexpected error code returned from clEnqueueSignalSemaphores");
return TEST_PASS;
}
};
// Confirm that a signal without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
{
return MakeAndRunTest<DXFenceNegativeSignal>(device, context, queue,
num_elements);
}

View File

@@ -16,39 +16,19 @@
#include "semaphore_dx_fence_base.h"
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
struct DXFenceQueryProperties final : DXFenceTestBase
{
int errcode = CL_SUCCESS;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
using DXFenceTestBase::DXFenceTestBase;
int Run() override
{
size_t properties_size_bytes = 0;
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
nullptr, &properties_size_bytes);
errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
0, nullptr, &properties_size_bytes);
test_error(errcode, "Could not get semaphore info");
std::vector<cl_semaphore_properties_khr> semaphore_properties(
properties_size_bytes / sizeof(cl_semaphore_properties_khr));
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
properties_size_bytes,
semaphore_properties.data(), nullptr);
test_error(errcode, "Could not get semaphore info");
@@ -58,12 +38,21 @@ REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
&& semaphore_properties[i + 1]
== reinterpret_cast<cl_semaphore_properties_khr>(
semaphore.getHandle()))
fence_handle))
{
return TEST_PASS;
}
}
log_error(
"Failed to find the dx fence handle type in the semaphore properties");
log_error("Failed to find the dx fence handle type in the semaphore "
"properties");
return TEST_FAIL;
}
};
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
{
return MakeAndRunTest<DXFenceQueryProperties>(device, context, queue,
num_elements);
}

View File

@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
test_semaphores_negative_create.cpp
test_semaphores_cross_queue.cpp
test_semaphores_queries.cpp
test_semaphores_payload.cpp
semaphore_base.h
)

View File

@@ -23,6 +23,7 @@
#include "harness/deviceInfo.h"
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
struct SemaphoreBase
{
@@ -37,27 +38,15 @@ struct SemaphoreBase
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
// If it is supported get the addresses of all the APIs here.
// clang-format off
#define GET_EXTENSION_ADDRESS(FUNC) \
FUNC = reinterpret_cast<FUNC##_fn>( \
clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \
if (FUNC == nullptr) \
{ \
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
" with " #FUNC "\n"); \
return TEST_FAIL; \
}
// clang-format on
GET_FUNCTION_EXTENSION_ADDRESS(device,
clCreateSemaphoreWithPropertiesKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR);
GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR);
GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR);
GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR);
GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR);
GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR);
GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR);
#undef GET_EXTENSION_ADDRESS
return CL_SUCCESS;
}

View File

@@ -14,14 +14,8 @@
// limitations under the License.
//
#include <thread>
#include "semaphore_base.h"
#include "semaphore_base.h"
#define FLUSH_DELAY_S 5
namespace {
const char* source = "__kernel void empty() {}";

View File

@@ -0,0 +1,86 @@
//
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_base.h"
namespace {
struct PayloadSemaphore : public SemaphoreTestBase
{
clSemaphoreWrapper sema_sec;
PayloadSemaphore(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
: SemaphoreTestBase(device, context, queue, nelems), sema_sec(this)
{}
cl_int Run() override
{
cl_int err = CL_SUCCESS;
// Create semaphore
cl_semaphore_properties_khr sema_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
sema_sec =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
{
cl_semaphore_payload_khr payload_list[] = { 1, 2 };
cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
// Signal semaphore
err = clEnqueueSignalSemaphoresKHR(
queue, 2, semaphores, payload_list, 0, nullptr, nullptr);
test_error(err, "Could not signal semaphore");
}
{
cl_semaphore_payload_khr payload_list[] = { 3, 4 };
cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
// Wait semaphore
err = clEnqueueWaitSemaphoresKHR(queue, 2, semaphores, payload_list,
0, nullptr, nullptr);
test_error(err, "Could not wait semaphore");
}
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
return CL_SUCCESS;
}
};
} // anonymous namespace
// Confirm that a valid semaphore payload values list will be ignored if no
// semaphores in the list of sema_objects require a payload
REGISTER_TEST_VERSION(semaphores_payload, Version(1, 2))
{
return MakeAndRunTest<PayloadSemaphore>(device, context, queue,
num_elements);
}

View File

@@ -336,9 +336,8 @@ int test_cl_image_write(cl_context context, cl_command_queue queue,
get_explicit_type_name(*outType), suffix, convert);
programPtr = kernelSource;
if (create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, (const char **)&programPtr,
"sample_test", ""))
if (create_single_kernel_helper(context, &program, &kernel, 1,
(const char **)&programPtr, "sample_test"))
{
return -1;
}

View File

@@ -629,7 +629,7 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
clCreateImage(context, flag, &format, &image_desc, nullptr,
&err);
test_failure_error(err, CL_INVALID_MEM_OBJECT,
test_failure_error(err, CL_INVALID_IMAGE_SIZE,
"Unexpected clCreateImage return");
err = clReleaseMemObject(buffer);

View File

@@ -859,7 +859,9 @@ double reference_add(double x, double y)
__m128 vb = _mm_set_ss((float)b);
va = _mm_add_ss(va, vb);
_mm_store_ss((float *)&a, va);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
// RISC-V CPUs with default 'f' fp32 extension do not support any way to
// enable/disable FTZ mode, subnormals are always handled without flushing.
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference add with FTZ must be emulated in
// sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
} ub;
ub.d = b;
cl_uint mantA, mantB;
cl_ulong addendA, addendB, sum;
cl_ulong addendA, addendB;
int expA = extractf(a, &mantA);
int expB = extractf(b, &mantB);
cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
__m128 vb = _mm_set_ss((float)b);
va = _mm_mul_ss(va, vb);
_mm_store_ss((float *)&a, va);
#elif defined(__PPC__)
#elif defined(__PPC__) || defined(__riscv)
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference multiply with FTZ must be
// emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
long double reference_rintl(long double x)
{
#if defined(__PPC__)
#if defined(__PPC__) || defined(__riscv)
// On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
// mantissa can represent more than LDBL_MANT_DIG binary digits.
x = rintl(x);

View File

@@ -557,8 +557,9 @@ int TestNonUniformWorkGroup::prepareDevice () {
if (_testRange & Range::BARRIERS)
buildOptions += " -D TESTBARRIERS";
err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1,
&KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
err = create_single_kernel_helper(_context, &_program, &_testKernel, 1,
&KERNEL_FUNCTION, "testKernel",
buildOptions.c_str());
if (err)
{
log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
@@ -842,8 +843,9 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
if (testRange & Range::BARRIERS)
buildOptions += " -D TESTBARRIERS";
err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1,
&KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
err = create_single_kernel_helper(_context, &program, &testKernel, 1,
&KERNEL_FUNCTION, "testKernel",
buildOptions.c_str());
if (err)
{
log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);

View File

@@ -63,8 +63,8 @@ REGISTER_TEST(pipe_info)
log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
}
err = create_single_kernel_helper_with_build_options(
context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel",
err = create_single_kernel_helper(context, &program, &kernel, 1,
&pipe_kernel_code, "pipe_kernel",
"-cl-std=CL2.0 -cl-kernel-arg-info");
test_error_fail(err, "Error creating program");

View File

@@ -70,14 +70,15 @@ struct printDataGenParameters
{
std::vector<std::string> genericFormats;
const char* dataRepresentation;
const char* vectorFormatFlag;
const char* vectorFormatSpecifier;
const char* dataType;
const char* vectorSize;
const char* addrSpaceArgumentTypeQualifier;
const char* addrSpaceVariableTypeQualifier;
const char* addrSpaceParameter;
const char* addrSpacePAdd;
const char* vectorFormatFlag = nullptr;
const char* vectorFormatSpecifier = nullptr;
const char* dataType = nullptr;
const char* vectorSize = nullptr;
const char* addrSpaceArgumentTypeQualifier = nullptr;
const char* addrSpaceVariableTypeQualifier = nullptr;
const char* addrSpaceParameter = nullptr;
const char* addrSpacePAdd = nullptr;
bool allowFallbackTest = false;
};
// Reference results - filled out at run-time
@@ -111,6 +112,9 @@ struct testCase
char*,
const size_t); //function pointer for generating reference results
Type dataType; //the data type that will be printed during reference result generation (used for setting rounding mode)
bool (*fallbackTestFN)(const char*,
const char*) =
nullptr; // function pointer to perform fallback test if required
};
extern const char* strType[];

View File

@@ -26,8 +26,11 @@ static void intRefBuilder(printDataGenParameters&, char*, const size_t);
static void halfRefBuilder(printDataGenParameters&, char* rResult,
const size_t);
static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
static bool floatRefTest(const char* refResult, const char* analysisBuffer);
static void doubleRefBuilder(printDataGenParameters&, char* rResult,
const size_t);
static bool doubleRefTest(const char* refResult, const char* analysisBuffer);
static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
static void hexRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -468,12 +471,12 @@ std::vector<printDataGenParameters> printFloatGenParameters = {
// Double argument representing floating-point,in [-]xh.hhhhpAd style
{ { "%.6a" }, "0.1f" },
{ { "%.6a" }, "0.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,Double argument representing floating-point,in
// xh.hhhhpAd style,default(right)-justified
{ { "%10.2a" }, "9990.235f" },
{ { "%10.2a" }, "1.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,two positions after the decimal,with
// a blank space inserted before the value, default(right)-justified
@@ -502,8 +505,9 @@ testCase testCaseFloat = {
floatRefBuilder,
kfloat
kfloat,
floatRefTest
};
//==============================================
@@ -673,12 +677,12 @@ std::vector<printDataGenParameters> printDoubleGenParameters = {
// Double argument representing floating-point,in [-]xh.hhhhpAd style
{ { "%.6a" }, "0.1" },
{ { "%.6a" }, "0.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,Double argument representing floating-point,in
// xh.hhhhpAd style,default(right)-justified
{ { "%10.2a" }, "9990.235" },
{ { "%10.2a" }, "1.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
};
//---------------------------------------------------------
@@ -697,8 +701,9 @@ testCase testCaseDouble = {
doubleRefBuilder,
kdouble
kdouble,
doubleRefTest
};
//==============================================
@@ -1032,6 +1037,9 @@ testCase testCaseChar = {
std::vector<printDataGenParameters> printStringGenParameters = {
// empty format, no data representation
{ {""} },
// empty format
{ {""}, "\"foo\"" },
@@ -1094,6 +1102,8 @@ std::vector<std::string> correctBufferString = {
"",
"",
" foo",
"f",
@@ -1752,7 +1762,15 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
return !std::regex_match(analysisBuffer, nanRegex);
}
return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
size_t ret =
strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
if (ret != 0 && pTestCase->_genParameters[testId].allowFallbackTest
&& pTestCase->fallbackTestFN)
if (pTestCase->fallbackTestFN(
analysisBuffer, pTestCase->_correctBuffer[testId].c_str()))
return 0;
return ret;
}
static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
@@ -1776,6 +1794,13 @@ static void floatRefBuilder(printDataGenParameters& params, char* refResult, con
strtof(params.dataRepresentation, NULL));
}
static bool floatRefTest(const char* refResult, const char* analysisBuffer)
{
float test = strtof(analysisBuffer, NULL);
float expected = strtof(refResult, NULL);
return test == expected;
}
static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
const size_t refSize)
{
@@ -1783,6 +1808,13 @@ static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
strtod(params.dataRepresentation, NULL));
}
static bool doubleRefTest(const char* refResult, const char* analysisBuffer)
{
double test = strtod(analysisBuffer, NULL);
double expected = strtod(refResult, NULL);
return test == expected;
}
static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
{
const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);

View File

@@ -18,7 +18,7 @@
const char *kernelCode = "__kernel void kernel_empty(){}";
REGISTER_TEST(profiling_timebase)
REGISTER_TEST_VERSION(profiling_timebase, Version(2, 1))
{
Version version = get_device_cl_version(device);
cl_platform_id platform = getPlatformFromDevice(device);

View File

@@ -341,9 +341,9 @@ int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
{
if ((targetArr[i]) % minAlign != (cl_uint)0)
{
vlog_error(
"Error %zu (of %zu). Expected a multiple of %zx, got %x\n", i,
pClState->m_numThreads, minAlign, targetArr[i]);
vlog_error("Error in work-item %zu (of %zu). Expected a multiple "
"of 0x%zx, got 0x%x\n",
i, pClState->m_numThreads, minAlign, targetArr[i]);
return -1;
}
}
@@ -371,7 +371,8 @@ int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
{
if (targetArr[i] != targetSize)
{
vlog_error("Error %zu (of %zu). Expected %d, got %d\n", i,
vlog_error(
"Error in work-item %zu (of %zu). Expected %d, got %d\n", i,
pClState->m_numThreads, targetSize, targetArr[i]);
return -1;
}
@@ -390,10 +391,11 @@ int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
{
if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
{
vlog_error(
"Error %zu (of %zu). Expected %zu more than a multiple of "
vlog_error("Error in work-item %zu (of %zu). Expected %zu more "
"than a multiple of "
"%zu, got %d \n",
i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]);
i, pClState->m_numThreads, beforeSize, totSize,
targetArr[i]);
return -1;
}
}

View File

@@ -3,7 +3,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
#define MAX_BUFFERS 5
layout(constant_id = 0) const uint MAX_BUFFERS = 5;
layout(binding = 0) buffer Params
{

View File

@@ -26,6 +26,7 @@
#endif
#include <assert.h>
#include <memory>
#include <vector>
#include <iostream>
#include <string.h>
@@ -79,9 +80,9 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase
VulkanBufferList vkBufferList(1, *vkDevice, bufferSize,
vkExternalMemoryHandleType);
VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
std::unique_ptr<VulkanDeviceMemory> vkDeviceMem(new VulkanDeviceMemory(
*vkDevice, vkBufferList[0], memoryTypeList[0],
vkExternalMemoryHandleType);
vkExternalMemoryHandleType));
vkDeviceMem->bindBuffer(vkBufferList[0], 0);

View File

@@ -128,12 +128,24 @@ int run_test_with_two_queue(
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
@@ -461,12 +473,24 @@ int run_test_with_one_queue(
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
@@ -764,12 +788,24 @@ int run_test_with_multi_import_same_ctx(
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
@@ -1103,12 +1139,24 @@ int run_test_with_multi_import_diff_ctx(
vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule);
VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList);
@@ -1586,7 +1634,7 @@ struct BufferTestBase : public VulkanTestBase
{
BufferTestBase(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
: VulkanTestBase(device, context, queue, nelems)
: VulkanTestBase(device, context, queue, nelems, true)
{}
int test_buffer_common(bool use_fence)

View File

@@ -37,11 +37,13 @@ inline void params_reset()
struct VulkanTestBase
{
VulkanTestBase(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
cl_command_queue queue, cl_int nelems,
bool useShaderInt8 = false)
: device(device), context(context), num_elems(nelems)
{
vkDevice.reset(new VulkanDevice(
getAssociatedVulkanPhysicalDevice(device, useValidationLayers)));
getAssociatedVulkanPhysicalDevice(device, useValidationLayers),
getDefaultVulkanQueueFamilyToQueueCountMap(), useShaderInt8));
cl_platform_id platform;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,