Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2026-03-25 12:05:00 -07:00
90 changed files with 5595 additions and 1710 deletions

View File

@@ -28,7 +28,7 @@ jobs:
arch: android-aarch64 arch: android-aarch64
android_arch_abi: arm64-v8a android_arch_abi: arm64-v8a
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v6
- name: Setup Ninja - name: Setup Ninja
uses: seanmiddleditch/gha-setup-ninja@master uses: seanmiddleditch/gha-setup-ninja@master
- name: Install Arm and AArch64 compilers - name: Install Arm and AArch64 compilers
@@ -184,7 +184,7 @@ jobs:
steps: steps:
- name: Install packages - name: Install packages
run: sudo apt install -y clang-format clang-format-14 run: sudo apt install -y clang-format clang-format-14
- uses: actions/checkout@v5 - uses: actions/checkout@v6
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Check code format - name: Check code format

View File

@@ -81,14 +81,17 @@ include(CheckFunctionExists)
include(CheckIncludeFiles) include(CheckIncludeFiles)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" OR
set(CLConform_TARGET_ARCH ARM) CMAKE_VS_PLATFORM_NAME MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
set(CLConform_TARGET_ARCH ARM64) set(CLConform_TARGET_ARCH ARM64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(CLConform_TARGET_ARCH ARM)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(CLConform_TARGET_ARCH x86_64) set(CLConform_TARGET_ARCH x86_64)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
set(CLConform_TARGET_ARCH x86) set(CLConform_TARGET_ARCH x86)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
set(CLConform_TARGET_ARCH RISCV)
endif() endif()
if(NOT DEFINED CLConform_TARGET_ARCH) if(NOT DEFINED CLConform_TARGET_ARCH)
@@ -139,8 +142,12 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
add_cxx_flag_if_supported(-frounding-math) add_cxx_flag_if_supported(-frounding-math)
endif() endif()
else() else()
# Curb the inclusion of SSE headers when compiling for non x86 targets
if(${CLConform_TARGET_ARCH} STREQUAL "x86_64" OR ${CLConform_TARGET_ARCH}
STREQUAL "x86")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
endif()
endif() endif()
# Set a module's COMPILE_FLAGS if using gcc or clang. # Set a module's COMPILE_FLAGS if using gcc or clang.

View File

@@ -50,7 +50,7 @@ std::string get_device_info_string(cl_device_id device,
} }
/* Determines if an extension is supported by a device. */ /* Determines if an extension is supported by a device. */
int is_extension_available(cl_device_id device, const char *extensionName) bool is_extension_available(cl_device_id device, const char *extensionName)
{ {
std::string extString = get_device_extensions_string(device); std::string extString = get_device_extensions_string(device);
std::istringstream ss(extString); std::istringstream ss(extString);

View File

@@ -26,7 +26,7 @@ std::string get_device_info_string(cl_device_id device,
cl_device_info param_name); cl_device_info param_name);
/* Determines if an extension is supported by a device. */ /* Determines if an extension is supported by a device. */
int is_extension_available(cl_device_id device, const char *extensionName); bool is_extension_available(cl_device_id device, const char *extensionName);
/* Returns the version of the extension the device supports or throws an /* Returns the version of the extension the device supports or throws an
* exception if the extension is not supported by the device. */ * exception if the extension is not supported by the device. */

View File

@@ -120,6 +120,12 @@ static int vlog_win32(const char *format, ...);
return retValue; \ return retValue; \
} \ } \
} }
#define test_object_failure_ret(object, errCode, expectedErrCode, msg, \
retValue) \
{ \
test_assert_error_ret(object == nullptr, msg, retValue); \
test_failure_error_ret(errCode, expectedErrCode, msg, retValue); \
}
#define print_failure_error(errCode, expectedErrCode, msg) \ #define print_failure_error(errCode, expectedErrCode, msg) \
log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, \ log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, \
IGetErrorString(errCode), IGetErrorString(expectedErrCode), \ IGetErrorString(errCode), IGetErrorString(expectedErrCode), \

View File

@@ -32,5 +32,15 @@
} \ } \
} while (false) } while (false)
#define GET_FUNCTION_EXTENSION_ADDRESS(device, FUNC) \
FUNC = \
reinterpret_cast<FUNC##_fn>(clGetExtensionFunctionAddressForPlatform( \
getPlatformFromDevice(device), #FUNC)); \
if (FUNC == nullptr) \
{ \
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
" with " #FUNC "\n"); \
return TEST_FAIL; \
}
#endif // _extensionHelpers_h #endif // _extensionHelpers_h

View File

@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
#elif defined(__PPC__) #elif defined(__PPC__)
#include <fpu_control.h> #include <fpu_control.h>
extern __thread fpu_control_t fpu_control; extern __thread fpu_control_t fpu_control;
#elif defined(__riscv)
#define _FPU_MASK_NI 1
static FPU_mode_type fpu_control;
#elif defined(__mips__) #elif defined(__mips__)
#include "mips/m32c1.h" #include "mips/m32c1.h"
#endif #endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__) || defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr(); *oldMode = _mm_getcsr();
_mm_setcsr(*oldMode | 0x8040); _mm_setcsr(*oldMode | 0x8040);
#elif defined(__PPC__) #elif defined(__PPC__) || defined(__riscv)
*oldMode = fpu_control; *oldMode = fpu_control;
fpu_control |= _FPU_MASK_NI; fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__) #elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
|| defined(_M_X64) || defined(__MINGW32__) || defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr(); *oldMode = _mm_getcsr();
_mm_setcsr(*oldMode & ~0x8040); _mm_setcsr(*oldMode & ~0x8040);
#elif defined(__PPC__) #elif defined(__PPC__) || defined(__riscv)
*mode = fpu_control; *oldMode = fpu_control;
fpu_control &= ~_FPU_MASK_NI; fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__) #elif defined(__arm__)
unsigned fpscr; unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__) || defined(_M_X64) || defined(__MINGW32__)
_mm_setcsr(*mode); _mm_setcsr(*mode);
#elif defined(__PPC__) #elif defined(__PPC__) || defined(__riscv)
fpu_control = *mode; fpu_control = *mode;
#elif defined(__arm__) #elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));

View File

@@ -141,7 +141,6 @@ std::string get_kernel_name(const std::string &source)
{ {
kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1); kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
kernelsList[kernelsList.size() - 1] = '.'; kernelsList[kernelsList.size() - 1] = '.';
kernelsList[kernelsList.size() - 1] = '.';
} }
oss << kernelsList; oss << kernelsList;
} }
@@ -678,17 +677,18 @@ static int create_single_kernel_helper_create_program_offline(
return CL_SUCCESS; return CL_SUCCESS;
} }
static int create_single_kernel_helper_create_program( int create_single_kernel_helper_create_program(cl_context context,
cl_context context, cl_device_id device, cl_program *outProgram, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram, unsigned int numKernelLines,
const char *buildOptions, CompilationMode compilationMode) const char **kernelProgram,
const char *buildOptions)
{ {
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex); std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
std::string filePrefix = std::string filePrefix =
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions); get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
bool shouldSaveToDisk = should_save_kernel_source_to_disk( bool shouldSaveToDisk = should_save_kernel_source_to_disk(
compilationMode, gCompilationCacheMode, gCompilationCachePath, gCompilationMode, gCompilationCacheMode, gCompilationCachePath,
filePrefix); filePrefix);
if (shouldSaveToDisk) if (shouldSaveToDisk)
@@ -701,7 +701,7 @@ static int create_single_kernel_helper_create_program(
return -1; return -1;
} }
} }
if (compilationMode == kOnline) if (gCompilationMode == kOnline)
{ {
int error = CL_SUCCESS; int error = CL_SUCCESS;
@@ -718,40 +718,9 @@ static int create_single_kernel_helper_create_program(
else else
{ {
return create_single_kernel_helper_create_program_offline( return create_single_kernel_helper_create_program_offline(
context, device, outProgram, numKernelLines, kernelProgram, context, nullptr, outProgram, numKernelLines, kernelProgram,
buildOptions, compilationMode);
}
}
int create_single_kernel_helper_create_program(cl_context context,
cl_program *outProgram,
unsigned int numKernelLines,
const char **kernelProgram,
const char *buildOptions)
{
return create_single_kernel_helper_create_program(
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
gCompilationMode);
}
int create_single_kernel_helper_create_program_for_device(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram,
const char *buildOptions)
{
return create_single_kernel_helper_create_program(
context, device, outProgram, numKernelLines, kernelProgram,
buildOptions, gCompilationMode); buildOptions, gCompilationMode);
} }
int create_single_kernel_helper_with_build_options(
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
unsigned int numKernelLines, const char **kernelProgram,
const char *kernelName, const char *buildOptions)
{
return create_single_kernel_helper(context, outProgram, outKernel,
numKernelLines, kernelProgram,
kernelName, buildOptions);
} }
// Creates and builds OpenCL C/C++ program, and creates a kernel // Creates and builds OpenCL C/C++ program, and creates a kernel
@@ -1239,8 +1208,8 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
{ {
cl_image_format *list; cl_image_format *list;
cl_uint count = 0; cl_uint count = 0;
cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128, cl_int err =
NULL, &count); clGetSupportedImageFormats(context, flags, image_type, 0, NULL, &count);
if (count == 0) return 0; if (count == 0) return 0;
list = (cl_image_format *)malloc(count * sizeof(cl_image_format)); list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
@@ -1276,7 +1245,6 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
return (i < count) ? 1 : 0; return (i < count) ? 1 : 0;
} }
size_t get_pixel_bytes(const cl_image_format *fmt);
size_t get_pixel_bytes(const cl_image_format *fmt) size_t get_pixel_bytes(const cl_image_format *fmt)
{ {
size_t chanCount; size_t chanCount;

View File

@@ -58,6 +58,13 @@ void helpInfo()
with a very small subset of the tests. This option should not be used with a very small subset of the tests. This option should not be used
for conformance submission (default: disabled). for conformance submission (default: disabled).
--invalid-object-scenarios=<option_1>,<option_2>....
Specify different scenarios to use when
testing for object validity. Options can be:
nullptr To use a nullptr (default)
valid_object_wrong_type To use a valid_object which is not the correct type
NOTE: valid_object_wrong_type option is not required for OpenCL conformance.
For offline compilation (binary and spir-v modes) only: For offline compilation (binary and spir-v modes) only:
--compilation-cache-mode <cache-mode> --compilation-cache-mode <cache-mode>
Specify a compilation caching mode: Specify a compilation caching mode:
@@ -104,6 +111,7 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
} }
delArg = 0; delArg = 0;
size_t i_object_length = strlen("--invalid-object-scenarios=");
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
{ {
@@ -264,6 +272,32 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
return -1; return -1;
} }
} }
else if (!strncmp(argv[i],
"--invalid-object-scenarios=", i_object_length))
{
if (strlen(argv[i]) > i_object_length)
{
delArg++;
gInvalidObject = 0;
std::string invalid_objects(argv[i]);
if (invalid_objects.find("nullptr") != std::string::npos)
{
gInvalidObject |= InvalidObject::Nullptr;
}
if (invalid_objects.find("valid_object_wrong_type")
!= std::string::npos)
{
gInvalidObject |= InvalidObject::ValidObjectWrongType;
}
}
else
{
log_error("Program argument for --invalid-object-scenarios was "
"not specified.\n");
return -1;
}
}
// cleaning parameters from argv tab // cleaning parameters from argv tab
for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg]; for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];

View File

@@ -201,6 +201,7 @@ RoundingMode get_round(void)
#elif defined(__mips__) #elif defined(__mips__)
#include "mips/m32c1.h" #include "mips/m32c1.h"
#endif #endif
void *FlushToZero(void) void *FlushToZero(void)
{ {
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
#elif defined(__mips__) #elif defined(__mips__)
fpa_bissr(FPA_CSR_FS); fpa_bissr(FPA_CSR_FS);
return NULL; return NULL;
#elif defined(__riscv)
return NULL;
#else #else
#error Unknown arch #error Unknown arch
#endif #endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
_FPU_SETCW(flags); _FPU_SETCW(flags);
#elif defined(__mips__) #elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS); fpa_bicsr(FPA_CSR_FS);
#elif defined(__riscv)
return;
#else #else
#error Unknown arch #error Unknown arch
#endif #endif

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "testHarness.h" #include "testHarness.h"
#include "stringHelpers.h"
#include "compat.h" #include "compat.h"
#include <algorithm> #include <algorithm>
#include <stdio.h> #include <stdio.h>
@@ -21,6 +22,7 @@
#include <string.h> #include <string.h>
#include <cassert> #include <cassert>
#include <deque> #include <deque>
#include <filesystem>
#include <mutex> #include <mutex>
#include <set> #include <set>
#include <stdexcept> #include <stdexcept>
@@ -33,6 +35,8 @@
#include "imageHelpers.h" #include "imageHelpers.h"
#include "parseParameters.h" #include "parseParameters.h"
namespace fs = std::filesystem;
#if !defined(_WIN32) #if !defined(_WIN32)
#include <sys/utsname.h> #include <sys/utsname.h>
#include <unistd.h> #include <unistd.h>
@@ -60,6 +64,7 @@ int gInfNanSupport = 1;
int gIsEmbedded = 0; int gIsEmbedded = 0;
int gHasLong = 1; int gHasLong = 1;
bool gCoreILProgram = true; bool gCoreILProgram = true;
int gInvalidObject = InvalidObject::Nullptr;
#define DEFAULT_NUM_ELEMENTS 0x4000 #define DEFAULT_NUM_ELEMENTS 0x4000
@@ -94,11 +99,25 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
FILE *file = fopen(fileName, "w"); fs::path file_path(fileName);
// When running under Bazel test, prepend the Bazel output directory to
// the provided path
if (nullptr != getenv("BAZEL_TEST"))
{
char *bazel_output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
if (nullptr != bazel_output_dir)
{
file_path = fs::path(bazel_output_dir) / file_path;
}
}
auto file_path_str = to_string(file_path.u8string());
FILE *file = fopen(file_path_str.c_str(), "w");
if (NULL == file) if (NULL == file)
{ {
log_error("ERROR: Failed to open '%s' for writing results.\n", log_error("ERROR: Failed to open '%s' for writing results.\n",
fileName); file_path_str.c_str());
return EXIT_FAILURE; return EXIT_FAILURE;
} }
@@ -127,7 +146,8 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS; int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
log_info("Saving results to %s: %s!\n", fileName, save_map[ret]); log_info("Saving results to %s: %s!\n", file_path_str.c_str(),
save_map[ret]);
return ret; return ret;
} }
@@ -308,6 +328,8 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
"CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n", "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
fileName != NULL ? fileName : "<undefined>"); fileName != NULL ? fileName : "<undefined>");
log_info("\t to save results to JSON file.\n"); log_info("\t to save results to JSON file.\n");
log_info("\t When running in Bazel test this is relative to "
"$TEST_UNDECLARED_OUTPUTS_DIR.\n");
log_info("\n"); log_info("\n");
log_info("Test names:\n"); log_info("Test names:\n");
@@ -1408,6 +1430,8 @@ void PrintArch(void)
vlog("ARCH:\tWindows\n"); vlog("ARCH:\tWindows\n");
#elif defined(__mips__) #elif defined(__mips__)
vlog("ARCH:\tmips\n"); vlog("ARCH:\tmips\n");
#elif defined(__riscv)
vlog("ARCH:\tRISC-V\n");
#else #else
#error unknown arch #error unknown arch
#endif #endif

View File

@@ -22,6 +22,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <type_traits>
class Version { class Version {
public: public:
@@ -257,6 +258,37 @@ extern std::string get_platform_info_string(cl_platform_id platform,
cl_platform_info param_name); cl_platform_info param_name);
extern bool is_platform_extension_available(cl_platform_id platform, extern bool is_platform_extension_available(cl_platform_id platform,
const char *extensionName); const char *extensionName);
enum InvalidObject
{
Nullptr = 1 << 0,
ValidObjectWrongType = 1 << 1,
};
extern int gInvalidObject;
template <typename T> std::vector<T> get_invalid_objects(cl_device_id device)
{
std::vector<T> ret;
if ((gInvalidObject & InvalidObject::Nullptr)
&& !(std::is_same<T, cl_platform_id>::value))
{
ret.push_back(nullptr);
}
if (gInvalidObject & InvalidObject::ValidObjectWrongType)
{
if (std::is_same<T, cl_device_id>::value)
{
cl_platform_id platform = getPlatformFromDevice(device);
ret.push_back(reinterpret_cast<T>(platform));
}
else
{
ret.push_back(reinterpret_cast<T>(device));
}
}
return ret;
}
#if !defined(__APPLE__) #if !defined(__APPLE__)
void memset_pattern4(void *, const void *, size_t); void memset_pattern4(void *, const void *, size_t);

View File

@@ -6,7 +6,10 @@ set(${MODULE_NAME}_SOURCES
main.cpp main.cpp
negative_platform.cpp negative_platform.cpp
negative_queue.cpp negative_queue.cpp
negative_context.cpp
negative_enqueue_marker.cpp
negative_enqueue_map_image.cpp negative_enqueue_map_image.cpp
negative_device.cpp
test_api_consistency.cpp test_api_consistency.cpp
test_bool.cpp test_bool.cpp
test_retain.cpp test_retain.cpp

View File

@@ -0,0 +1,290 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
/* Negative Tests for clCreateContext */
REGISTER_TEST(negative_create_context)
{
cl_context_properties props[3] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr), 0
};
cl_int err = 0;
cl_context ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContext should return CL_INVALID_PLATFORM when:\"an invalid "
"platform object is used with the CL_CONTEXT_PLATFORM property\" using "
"a nullptr",
TEST_FAIL);
props[0] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
props[1] = reinterpret_cast<cl_context_properties>(nullptr);
ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"context "
"property name in properties is not a supported property name\"",
TEST_FAIL);
if (get_device_cl_version(device) >= Version(1, 2))
{
cl_context_properties invalid_value{ -1 };
props[0] = CL_CONTEXT_INTEROP_USER_SYNC;
props[1] = invalid_value;
ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"the "
"value specified for a supported property name is not valid\"",
TEST_FAIL);
cl_bool property_value = CL_FALSE;
cl_context_properties duplicated_property[5] = {
CL_CONTEXT_INTEROP_USER_SYNC,
static_cast<cl_context_properties>(property_value),
CL_CONTEXT_INTEROP_USER_SYNC,
static_cast<cl_context_properties>(property_value), 0
};
ctx = clCreateContext(duplicated_property, 1, &device, nullptr, nullptr,
&err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContext should return CL_INVALID_PROPERTY when: \"the "
"same property name is specified more than once\"",
TEST_FAIL);
}
ctx = clCreateContext(nullptr, 1, nullptr, nullptr, nullptr, &err);
test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE "
"when: \"devices is NULL\"",
TEST_FAIL);
ctx = clCreateContext(nullptr, 0, &device, nullptr, nullptr, &err);
test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE "
"when: \"num_devices is equal to zero\"",
TEST_FAIL);
int user_data = 1; // Arbitrary non-NULL value
ctx = clCreateContext(nullptr, 1, &device, nullptr, &user_data, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_VALUE,
"clCreateContext should return CL_INVALID_VALUE when: \"pfn_notify is "
"NULL but user_data is not NULL\"",
TEST_FAIL);
cl_device_id invalid_device = nullptr;
ctx = clCreateContext(nullptr, 1, &invalid_device, nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_DEVICE,
"clCreateContext should return CL_INVALID_DEVICE when: \"any device in "
"devices is not a valid device\" using a device set to nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clCreateContextFromType */
REGISTER_TEST(negative_create_context_from_type)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_context_properties props[5] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr),
0, 0, 0
};
cl_int err = 0;
cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT,
nullptr, nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
"invalid platform object is used with the CL_CONTEXT_PLATFORM "
"property\" using a nullptr",
TEST_FAIL);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PLATFORM,
"clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
"invalid platform object is used with the CL_CONTEXT_PLATFORM "
"property\" using a valid object that is NOT a platform",
TEST_FAIL);
props[1] = reinterpret_cast<cl_context_properties>(platform);
props[2] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
props[3] = reinterpret_cast<cl_context_properties>(nullptr);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"context property name in properties is not a supported property "
"name\"",
TEST_FAIL);
if (get_device_cl_version(device) >= Version(1, 2))
{
cl_context_properties invalid_value{ -1 };
props[2] = CL_CONTEXT_INTEROP_USER_SYNC;
props[3] = invalid_value;
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"the value specified for a supported property name is not "
"valid\"",
TEST_FAIL);
props[2] = CL_CONTEXT_PLATFORM;
props[3] = reinterpret_cast<cl_context_properties>(platform);
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_PROPERTY,
"clCreateContextFromType should return CL_INVALID_PROPERTY when: "
"\"the same property name is specified more than once\"",
TEST_FAIL);
}
int user_data = 1; // Arbitrary non-NULL value
ctx = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_DEFAULT, nullptr,
&user_data, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_VALUE,
"clCreateContextFromType should return CL_INVALID_VALUE when: "
"\"pfn_notify is NULL but user_data is not NULL\"",
TEST_FAIL);
cl_device_type INVALID_DEVICE_TYPE = 0;
ctx = clCreateContextFromType(nullptr, INVALID_DEVICE_TYPE, nullptr,
nullptr, &err);
test_object_failure_ret(
ctx, err, CL_INVALID_DEVICE_TYPE,
"clCreateContextFromType should return CL_INVALID_DEVICE_TYPE when: "
"\"device_type is not a valid value\"",
TEST_FAIL);
std::vector<cl_device_type> device_types = { CL_DEVICE_TYPE_CPU,
CL_DEVICE_TYPE_GPU,
CL_DEVICE_TYPE_ACCELERATOR };
if (get_device_cl_version(device) >= Version(1, 2))
{
device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
}
for (auto type : device_types)
{
clContextWrapper tmp_context =
clCreateContextFromType(nullptr, type, nullptr, nullptr, &err);
if (err != CL_SUCCESS)
{
test_object_failure_ret(
tmp_context, err, CL_DEVICE_NOT_FOUND,
"clCreateContextFromType should return CL_DEVICE_NOT_FOUND "
"when: \"no devices that match device_type and property values "
"specified in properties are currently available\"",
TEST_FAIL);
break;
}
}
return TEST_PASS;
}
/* Negative Tests for clRetainContext */
REGISTER_TEST(negative_retain_context)
{
cl_int err = clRetainContext(nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clRetainContext should return CL_INVALID_CONTEXT when: \"context is "
"not a valid OpenCL context\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clReleaseContext */
REGISTER_TEST(negative_release_context)
{
cl_int err = clReleaseContext(nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clReleaseContext should return CL_INVALID_CONTEXT when: \"context is "
"not a valid OpenCL context\" using a nullptr",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clGetContextInfo */
REGISTER_TEST(negative_get_context_info)
{
cl_uint param_value = 0;
cl_int err = clGetContextInfo(nullptr, CL_CONTEXT_REFERENCE_COUNT,
sizeof(param_value), &param_value, nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clGetContextInfo should return CL_INVALID_CONTEXT when: \"context is "
"not a valid context\" using a nullptr",
TEST_FAIL);
cl_context_info INVALID_PARAM_VALUE = 0;
err = clGetContextInfo(context, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetContextInfo should return CL_INVALID_VALUE when: \"param_name is "
"not one of the supported values\"",
TEST_FAIL);
err = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
sizeof(param_value) - 1, &param_value, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetContextInfo should return CL_INVALID_VALUE when: \"size in bytes "
"specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clSetContextDestructorCallback */
static void CL_CALLBACK callback(cl_context context, void* user_data) {}
REGISTER_TEST_VERSION(negative_set_context_destructor_callback, Version(3, 0))
{
cl_int err = clSetContextDestructorCallback(nullptr, callback, nullptr);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clSetContextDestructorCallback should return CL_INVALID_CONTEXT when: "
"\"context is not a valid context\" using a nullptr",
TEST_FAIL);
err = clSetContextDestructorCallback(context, nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clSetContextDestructorCallback should return "
"CL_INVALID_VALUE when: \"pfn_notify is NULL\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -0,0 +1,526 @@
//
// Copyright (c) 2021 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/testHarness.h"
#include <vector>
/* Negative Tests for clGetDeviceInfo */
REGISTER_TEST(negative_get_device_info)
{
cl_device_type device_type = 0;
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetDeviceInfo(invalid_device, CL_DEVICE_TYPE,
sizeof(device_type), &device_type, nullptr);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clGetDeviceInfo should return "
"CL_INVALID_DEVICE when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
constexpr cl_device_info INVALID_PARAM_VALUE = 0;
err = clGetDeviceInfo(device, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceInfo should return CL_INVALID_VALUE when: \"param_name is "
"not one of the supported values\"",
TEST_FAIL);
err = clGetDeviceInfo(device, CL_DEVICE_TYPE, 0, &device_type, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceInfo should return CL_INVALID_VALUE when: \"size in bytes "
"specified by param_value_size is < size of return type and "
"param_value is not a NULL value\"",
TEST_FAIL);
return TEST_PASS;
}
/* Negative Tests for clGetDeviceIDs */
REGISTER_TEST(negative_get_device_ids)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_device_id devices = nullptr;
cl_int err(CL_SUCCESS);
for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
{
err = clGetDeviceIDs(invalid_platform, CL_DEVICE_TYPE_DEFAULT, 1,
&devices, nullptr);
test_failure_error_ret(err, CL_INVALID_PLATFORM,
"clGetDeviceIDs should return "
"CL_INVALID_PLATFORM when: \"platform is "
"not a valid platform\"",
TEST_FAIL);
}
cl_device_type INVALID_DEVICE_TYPE = 0;
err = clGetDeviceIDs(platform, INVALID_DEVICE_TYPE, 1, &devices, nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_TYPE,
"clGetDeviceIDs should return CL_INVALID_DEVICE_TYPE when: "
"\"device_type is not a valid value\"",
TEST_FAIL);
err =
clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 0, &devices, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetDeviceIDs should return when: \"num_entries "
"is equal to zero and devices is not NULL\"",
TEST_FAIL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetDeviceIDs should return CL_INVALID_VALUE "
"when: \"both num_devices and devices are NULL\"",
TEST_FAIL);
devices = nullptr;
std::vector<cl_device_type> device_types{ CL_DEVICE_TYPE_CPU,
CL_DEVICE_TYPE_GPU,
CL_DEVICE_TYPE_ACCELERATOR };
if (get_device_cl_version(device) >= Version(1, 2))
{
device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
}
bool platform_supports_all_device_types = true;
for (auto device_type : device_types)
{
err = clGetDeviceIDs(platform, device_type, 1, &devices, nullptr);
if (err == CL_SUCCESS)
{
continue;
}
platform_supports_all_device_types = false;
break;
}
if (platform_supports_all_device_types)
{
log_info("Platform has every Device Type... Skipping Test\n");
}
else
{
test_failure_error_ret(
err, CL_DEVICE_NOT_FOUND,
"clGetDeviceIDs should return CL_DEVICE_NOT_FOUND when: \"no "
"OpenCL devices that matched device_type were found\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clGetDeviceAndHostTimer */
REGISTER_TEST_VERSION(negative_get_device_and_host_timer, Version(2, 1))
{
cl_ulong *device_timestamp = nullptr, *host_timestamp = nullptr;
cl_int err = CL_SUCCESS;
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetDeviceAndHostTimer(invalid_device, device_timestamp,
host_timestamp);
test_failure_error_ret(
err, CL_INVALID_DEVICE,
"clGetDeviceAndHostTimer should return CL_INVALID_DEVICE when: "
"\"device is not a valid device\"",
TEST_FAIL);
}
cl_platform_id platform = getPlatformFromDevice(device);
// Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
// support timer synchronisation
cl_ulong timer_resolution = 1;
auto device_version = get_device_cl_version(device);
err =
clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
sizeof(timer_resolution), &timer_resolution, nullptr);
test_error(err, "clGetPlatformInfo failed");
if (timer_resolution == 0
&& (device_version == Version(2, 1) || device_version == Version(2, 2)))
{
log_error("Support for device and host timer synchronization is "
"required for platforms supporting OpenCL 2.1 or 2.2.");
return TEST_FAIL;
}
if (timer_resolution != 0)
{
log_info("Platform Supports Timers\n");
log_info("Skipping CL_INVALID_OPERATION tests\n");
err = clGetDeviceAndHostTimer(device, nullptr, host_timestamp);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
"\"host_timestamp or device_timestamp is NULL\" using nullptr for "
"device_timestamp ",
TEST_FAIL);
err = clGetDeviceAndHostTimer(device, device_timestamp, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
"\"host_timestamp or device_timestamp is NULL\" using nullptr for "
"host_timestamp ",
TEST_FAIL);
}
else
{
log_info("Platform does not Support Timers\n");
log_info("Skipping CL_INVALID_VALUE tests\n");
err = clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clGetDeviceAndHostTimer should return CL_INVALID_OPERATION when: "
"\"the platform associated with device does not support device and "
"host timer synchronization\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clGetHostTimer */
REGISTER_TEST_VERSION(negative_get_host_timer, Version(2, 1))
{
cl_ulong host_timestamp = 0;
cl_int err = CL_SUCCESS;
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clGetHostTimer(invalid_device, &host_timestamp);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clGetHostTimer should return CL_INVALID_DEVICE "
"when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
cl_platform_id platform = getPlatformFromDevice(device);
// Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
// support timer synchronisation
cl_ulong timer_resolution = 1;
auto device_version = get_device_cl_version(device);
err =
clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
sizeof(timer_resolution), &timer_resolution, nullptr);
test_error(err, "clGetPlatformInfo failed");
if (timer_resolution == 0
&& (device_version == Version(2, 1) || device_version == Version(2, 2)))
{
log_error("Support for device and host timer synchronization is "
"required for platforms supporting OpenCL 2.1 or 2.2.");
return TEST_FAIL;
}
if (timer_resolution != 0)
{
log_info("Platform Supports Timers\n");
log_info("Skipping CL_INVALID_OPERATION tests\n");
err = clGetHostTimer(device, nullptr);
test_failure_error_ret(err, CL_INVALID_VALUE,
"clGetHostTimer should return CL_INVALID_VALUE "
"when: \"host_timestamp is NULL\"",
TEST_FAIL);
}
else
{
log_info("Platform does not Support Timers\n");
log_info("Skipping CL_INVALID_VALUE tests\n");
err = clGetHostTimer(device, &host_timestamp);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clGetHostTimer should return CL_INVALID_OPERATION when: \"the "
"platform associated with device does not support device and host "
"timer synchronization\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clCreateSubDevices */
enum SupportedPartitionSchemes
{
None = 0,
Equally = 1 << 0,
Counts = 1 << 1,
Affinity = 1 << 2,
All_Schemes = Affinity | Counts | Equally,
};
static int get_supported_properties(cl_device_id device)
{
size_t number_of_properties = 0;
int err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, 0,
nullptr, &number_of_properties);
test_error(err, "clGetDeviceInfo");
std::vector<cl_device_partition_property> supported_properties(
number_of_properties / sizeof(cl_device_partition_property));
err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES,
number_of_properties, &supported_properties.front(),
nullptr);
test_error(err, "clGetDeviceInfo");
int ret = SupportedPartitionSchemes::None;
for (auto property : supported_properties)
{
switch (property)
{
case CL_DEVICE_PARTITION_EQUALLY:
ret |= SupportedPartitionSchemes::Equally;
break;
case CL_DEVICE_PARTITION_BY_COUNTS:
ret |= SupportedPartitionSchemes::Counts;
break;
case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
ret |= SupportedPartitionSchemes::Affinity;
break;
default: break;
}
}
return ret;
}
static std::vector<cl_device_partition_property>
get_invalid_properties(int unsupported_properties)
{
if (unsupported_properties & SupportedPartitionSchemes::Equally)
{
return { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
}
else if (unsupported_properties & SupportedPartitionSchemes::Counts)
{
return { CL_DEVICE_PARTITION_BY_COUNTS, 1,
CL_DEVICE_PARTITION_BY_COUNTS_LIST_END };
}
else if (unsupported_properties & SupportedPartitionSchemes::Affinity)
{
return { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN,
CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0 };
}
else
{
return {};
}
}
static cl_uint get_uint_device_info(const cl_device_id device,
const cl_device_info param_name)
{
cl_uint ret = 0;
cl_int err =
clGetDeviceInfo(device, param_name, sizeof(ret), &ret, nullptr);
test_error(err, "clGetDeviceInfo");
return ret;
}
REGISTER_TEST_VERSION(negative_create_sub_devices, Version(1, 2))
{
int supported_properties = get_supported_properties(device);
if (supported_properties == SupportedPartitionSchemes::None)
{
printf("Device does not support creating subdevices... Skipping\n");
return TEST_SKIPPED_ITSELF;
}
cl_device_partition_property properties[4] = {};
cl_uint max_compute_units =
get_uint_device_info(device, CL_DEVICE_MAX_COMPUTE_UNITS);
cl_uint max_sub_devices =
get_uint_device_info(device, CL_DEVICE_PARTITION_MAX_SUB_DEVICES);
std::vector<cl_device_id> out_devices;
cl_uint max_for_partition = 0;
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[0] = CL_DEVICE_PARTITION_EQUALLY;
properties[1] = 1;
properties[2] = 0;
out_devices.resize(static_cast<size_t>(max_compute_units));
max_for_partition = max_compute_units;
}
else if (supported_properties & SupportedPartitionSchemes::Counts)
{
properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
properties[1] = 1;
properties[2] = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
out_devices.resize(static_cast<size_t>(max_sub_devices));
max_for_partition = max_sub_devices;
}
else
{
properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
properties[2] = 0;
}
properties[3] = 0;
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clCreateSubDevices(invalid_device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE when: \"in_device "
"is not a valid device\"",
TEST_FAIL);
}
err = clCreateSubDevices(device, nullptr, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are not valid\" using a nullptr",
TEST_FAIL);
err =
clCreateSubDevices(device, properties, 0, out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"out_devices "
"is not NULL and num_devices is less than the number of sub-devices "
"created by the partition scheme\"",
TEST_FAIL);
if (supported_properties != SupportedPartitionSchemes::All_Schemes)
{
std::vector<cl_device_partition_property> invalid_properties =
get_invalid_properties(supported_properties
^ SupportedPartitionSchemes::All_Schemes);
err =
clCreateSubDevices(device, invalid_properties.data(),
out_devices.size(), out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are valid but not supported by the "
"device\"",
TEST_FAIL);
}
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[1] = max_compute_units;
err = clCreateSubDevices(device, properties, max_for_partition,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_DEVICE_PARTITION_FAILED,
"clCreateSubDevices should return CL_DEVICE_PARTITION_FAILED when: "
"\"the partition name is supported by the implementation but "
"in_device could not be further partitioned\"",
TEST_FAIL);
}
constexpr cl_device_partition_property INVALID_PARTITION_PROPERTY =
-1; // Aribitrary Invalid number
properties[0] = INVALID_PARTITION_PROPERTY;
err = clCreateSubDevices(device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
"specified in properties are not valid\" using an invalid property",
TEST_FAIL);
if (supported_properties & SupportedPartitionSchemes::Counts)
{
properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
properties[1] = max_sub_devices + 1;
err = clCreateSubDevices(device, properties, max_sub_devices + 1,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the partition name "
"specified in properties is CL_DEVICE_ PARTITION_BY_COUNTS and the "
"number of sub-devices requested exceeds "
"CL_DEVICE_PARTITION_MAX_SUB_DEVICES\"",
TEST_FAIL);
properties[1] = -1;
err = clCreateSubDevices(device, properties, out_devices.size(),
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the number of compute "
"units requested for one or more sub-devices is less than zero\"",
TEST_FAIL);
}
if (supported_properties & SupportedPartitionSchemes::Equally)
{
properties[0] = CL_DEVICE_PARTITION_EQUALLY;
properties[1] = max_compute_units + 1;
err = clCreateSubDevices(device, properties, max_compute_units + 1,
out_devices.data(), nullptr);
test_failure_error_ret(
err, CL_INVALID_DEVICE_PARTITION_COUNT,
"clCreateSubDevices should return "
"CL_INVALID_DEVICE_PARTITION_COUNT when: \"the total number of "
"compute units requested exceeds CL_DEVICE_MAX_COMPUTE_UNITS for "
"in_device\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clRetainDevice */
REGISTER_TEST_VERSION(negative_retain_device, Version(1, 2))
{
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clRetainDevice(invalid_device);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clRetainDevice should return CL_INVALID_DEVICE "
"when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
return TEST_PASS;
}
/* Negative Tests for clReleaseDevice */
REGISTER_TEST_VERSION(negative_release_device, Version(1, 2))
{
cl_int err(CL_SUCCESS);
for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
{
err = clReleaseDevice(invalid_device);
test_failure_error_ret(err, CL_INVALID_DEVICE,
"clReleaseDevice should return "
"CL_INVALID_DEVICE when: \"device is not "
"a valid device\"",
TEST_FAIL);
}
return TEST_PASS;
}

View File

@@ -0,0 +1,97 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "testBase.h"
#include "harness/typeWrappers.h"
REGISTER_TEST(negative_enqueue_marker_with_wait_list)
{
cl_platform_id platform = getPlatformFromDevice(device);
cl_context_properties props[3] = {
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platform),
0
};
cl_int err = CL_SUCCESS;
clContextWrapper ctx =
clCreateContext(props, 1, &device, nullptr, nullptr, &err);
test_error(err, "clCreateContext failed");
cl_event ret_event = nullptr;
err = clEnqueueMarkerWithWaitList(nullptr, 0, nullptr, &ret_event);
test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
"clEnqueueMarkerWithWaitList should return "
"CL_INVALID_COMMAND_QUEUE when: \"command_queue is "
"not a valid host command-queue\" using a nullptr",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
clEventWrapper different_ctx_event = clCreateUserEvent(ctx, &err);
test_error(err, "clCreateUserEvent failed");
err =
clEnqueueMarkerWithWaitList(queue, 1, &different_ctx_event, &ret_event);
test_failure_error_ret(
err, CL_INVALID_CONTEXT,
"clEnqueueMarkerWithWaitList should return CL_INVALID_CONTEXT when: "
"\"The context of both the command queue and the events in ret_event "
"wait list are not the same\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
err = clEnqueueMarkerWithWaitList(queue, 1, nullptr, &ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"num_events_in_wait_list > 0 but event_wait_list is NULL\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
clEventWrapper event = clCreateUserEvent(context, &err);
test_error(err, "clCreateUserEvent failed");
err = clEnqueueMarkerWithWaitList(queue, 0, &event, &ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"num_events_in_wait_list is 0 but event_wait_list is not "
"NULL\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
cl_event invalid_event_wait_list[] = { nullptr };
err = clEnqueueMarkerWithWaitList(queue, 1, invalid_event_wait_list,
&ret_event);
test_failure_error_ret(
err, CL_INVALID_EVENT_WAIT_LIST,
"clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
"when: \"event objects in event_wait_list are not valid events\"",
TEST_FAIL);
test_assert_error(ret_event == nullptr,
"if clEnqueueMarkerWithWaitList failed, no ret_event "
"should be created");
return TEST_PASS;
}

View File

@@ -40,9 +40,20 @@ REGISTER_TEST(negative_get_platform_info)
{ {
cl_platform_id platform = getPlatformFromDevice(device); cl_platform_id platform = getPlatformFromDevice(device);
cl_int err(CL_SUCCESS);
for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
{
err = clGetPlatformInfo(invalid_platform, CL_PLATFORM_VERSION,
sizeof(char*), nullptr, nullptr);
test_failure_error_ret(err, CL_INVALID_PLATFORM,
"clGetPlatformInfo should return "
"CL_INVALID_PLATFORM when: \"platform "
"is not a valid platform\"",
TEST_FAIL);
}
constexpr cl_platform_info INVALID_PARAM_VALUE = 0; constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
cl_int err = err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret( test_failure_error_ret(
err, CL_INVALID_VALUE, err, CL_INVALID_VALUE,
"clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name " "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "

View File

@@ -348,18 +348,27 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
return TEST_FAIL; return TEST_FAIL;
} }
{
cl_command_queue_properties queue_properties;
err =
clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(queue_properties), &queue_properties, NULL);
test_error(err, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
if (queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
{ {
constexpr cl_queue_properties props[] = { constexpr cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
}; };
clCommandQueueWrapper not_on_device_queue = clCommandQueueWrapper not_on_device_queue =
clCreateCommandQueueWithProperties(context, device, props, &err); clCreateCommandQueueWithProperties(context, device, props,
&err);
test_error_fail(err, "clCreateCommandQueueWithProperties failed"); test_error_fail(err, "clCreateCommandQueueWithProperties failed");
err = clSetDefaultDeviceCommandQueue(context, device, err = clSetDefaultDeviceCommandQueue(context, device,
not_on_device_queue); not_on_device_queue);
if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE) if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
{ {
log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n", log_error(
"ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
"clSetDefaultDeviceCommandQueue should return " "clSetDefaultDeviceCommandQueue should return "
"CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: " "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
"\"command_queue is not a valid command-queue for " "\"command_queue is not a valid command-queue for "
@@ -369,6 +378,7 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
__FILE__, __LINE__); __FILE__, __LINE__);
} }
} }
}
return TEST_PASS; return TEST_PASS;
} }

View File

@@ -1526,7 +1526,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
size_t threads[1], localThreads[1]; size_t threads[1], localThreads[1];
cl_int *constantData, *resultData; cl_int *constantData, *resultData;
cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
int i; size_t i;
cl_event event; cl_event event;
cl_int event_status; cl_int event_status;
MTdata d; MTdata d;
@@ -1556,6 +1556,8 @@ REGISTER_TEST(min_max_constant_buffer_size)
maxAllocSize = get_device_info_max_mem_alloc_size( maxAllocSize = get_device_info_max_mem_alloc_size(
device, MAX_DEVICE_MEMORY_SIZE_DIVISOR); device, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
log_info("Reported max alloc size of %" PRIu64 " bytes.\n",
(uint64_t)maxAllocSize);
if (maxSize > maxAllocSize) maxSize = maxAllocSize; if (maxSize > maxAllocSize) maxSize = maxAllocSize;
@@ -1590,7 +1592,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
for (i = 0; i < (int)(numberOfInts); i++) for (i = 0; i < numberOfInts; i++)
constantData[i] = (int)genrand_int32(d); constantData[i] = (int)genrand_int32(d);
clMemWrapper streams[3]; clMemWrapper streams[3];
@@ -1678,11 +1680,11 @@ REGISTER_TEST(min_max_constant_buffer_size)
sizeToAllocate, resultData, 0, NULL, NULL); sizeToAllocate, resultData, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
for (i = 0; i < (int)(numberOfInts); i++) for (i = 0; i < numberOfInts; i++)
if (constantData[i] != resultData[i]) if (constantData[i] != resultData[i])
{ {
log_error("Data failed to verify: constantData[%d]=%d != " log_error("Data failed to verify: constantData[%zu]=%d != "
"resultData[%d]=%d\n", "resultData[%zu]=%d\n",
i, constantData[i], i, resultData[i]); i, constantData[i], i, resultData[i]);
free(constantData); free(constantData);
free(resultData); free(resultData);

View File

@@ -487,10 +487,10 @@ compare_kernel_with_expected(cl_context context, cl_device_id device,
int failed_tests = 0; int failed_tests = 0;
clKernelWrapper kernel; clKernelWrapper kernel;
clProgramWrapper program; clProgramWrapper program;
cl_int err = create_single_kernel_helper_with_build_options( cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", &kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str()); get_build_options(device).c_str());
test_error(err, "create_single_kernel_helper_with_build_options"); test_error(err, "create_single_kernel_helper");
for (size_t i = 0; i < expected_args.size(); ++i) for (size_t i = 0; i < expected_args.size(); ++i)
{ {
KernelArgInfo actual; KernelArgInfo actual;
@@ -874,11 +874,10 @@ static int test_null_param(cl_context context, cl_device_id device,
{ {
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
cl_int err = create_single_kernel_helper_with_build_options( cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", &kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str()); get_build_options(device).c_str());
test_error_ret(err, "create_single_kernel_helper_with_build_options", test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
TEST_FAIL);
err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr, CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
@@ -916,12 +915,11 @@ static int test_arg_name_size(cl_context context, cl_device_id device,
char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1]; char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
cl_int err = create_single_kernel_helper_with_build_options( cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", &kernel_src, "get_kernel_arg_info",
get_build_options(device).c_str()); get_build_options(device).c_str());
test_error_ret(err, "create_single_kernel_helper_with_build_options", test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
TEST_FAIL);
err = err =
clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME, clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,

View File

@@ -382,17 +382,18 @@ REGISTER_TEST(null_required_work_group_size)
clMemWrapper dst; clMemWrapper dst;
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int), dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error); nullptr, &error);
test_error(error, "clCreateBuffer failed");
struct KernelAttribInfo struct KernelAttribInfo
{ {
std::string str; cl_int wgs[3];
cl_uint max_dim; cl_uint min_dim;
}; };
std::vector<KernelAttribInfo> attribs; std::vector<KernelAttribInfo> attribs;
attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 }); attribs.push_back({ { 2, 1, 1 }, 1 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 }); attribs.push_back({ { 2, 3, 1 }, 2 });
attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 }); attribs.push_back({ { 2, 3, 4 }, 3 });
const std::string body_str = R"( const std::string body_str = R"(
__kernel void wg_size(__global int* dst) __kernel void wg_size(__global int* dst)
@@ -409,7 +410,11 @@ REGISTER_TEST(null_required_work_group_size)
for (auto& attrib : attribs) for (auto& attrib : attribs)
{ {
const std::string source_str = attrib.str + body_str; const std::string attrib_str = "__attribute__((reqd_work_group_size("
+ std::to_string(attrib.wgs[0]) + ","
+ std::to_string(attrib.wgs[1]) + ","
+ std::to_string(attrib.wgs[2]) + ")))";
const std::string source_str = attrib_str + body_str;
const char* source = source_str.c_str(); const char* source = source_str.c_str();
clProgramWrapper program; clProgramWrapper program;
@@ -421,21 +426,19 @@ REGISTER_TEST(null_required_work_group_size)
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst); error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++) for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
{ {
const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
work_dim >= 3 ? 4 : 1 };
const size_t test_work_group_size = const size_t test_work_group_size =
expected[0] * expected[1] * expected[2]; attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
if ((size_t)expected[0] > device_max_work_item_sizes[0] if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
|| (size_t)expected[1] > device_max_work_item_sizes[1] || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
|| (size_t)expected[2] > device_max_work_item_sizes[2] || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size) || test_work_group_size > device_max_work_group_size)
{ {
log_info("Skipping test for work_dim = %u: required work group " log_info("Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max " "size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n", "work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, expected[0], expected[1], expected[2], work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
test_work_group_size, device_max_work_item_sizes[0], test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1], device_max_work_item_sizes[1],
device_max_work_item_sizes[2], device_max_work_item_sizes[2],
@@ -444,8 +447,10 @@ REGISTER_TEST(null_required_work_group_size)
} }
const cl_int zero = 0; const cl_int zero = 0;
error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0, error =
sizeof(expected), 0, nullptr, nullptr); clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
sizeof(attrib.wgs), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
@@ -458,12 +463,12 @@ REGISTER_TEST(null_required_work_group_size)
results, 0, nullptr, nullptr); results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
if (results[0] != expected[0] || results[1] != expected[1] if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
|| results[2] != expected[2]) || results[2] != attrib.wgs[2])
{ {
log_error("Executed local size mismatch with work_dim = %u: " log_error("Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n", "Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, expected[0], expected[1], expected[2], work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
results[0], results[1], results[2]); results[0], results[1], results[2]);
return TEST_FAIL; return TEST_FAIL;
} }
@@ -477,15 +482,15 @@ REGISTER_TEST(null_required_work_group_size)
test_error(error, test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed"); "clGetKernelSuggestedLocalWorkSizeKHR failed");
if ((cl_int)suggested[0] != expected[0] if (suggested[0] != (size_t)attrib.wgs[0]
|| (cl_int)suggested[1] != expected[1] || suggested[1] != (size_t)attrib.wgs[1]
|| (cl_int)suggested[2] != expected[2]) || suggested[2] != (size_t)attrib.wgs[2])
{ {
log_error("Suggested local size mismatch with work_dim = " log_error("Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%d,%d,%d)\n", "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
work_dim, expected[0], expected[1], expected[2], work_dim, attrib.wgs[0], attrib.wgs[1],
(cl_int)suggested[0], (cl_int)suggested[1], attrib.wgs[2], suggested[0], suggested[1],
(cl_int)suggested[2]); suggested[2]);
return TEST_FAIL; return TEST_FAIL;
} }
} }

View File

@@ -982,6 +982,12 @@ REGISTER_TEST(negative_invalid_arg_index)
REGISTER_TEST(negative_invalid_arg_size_local) REGISTER_TEST(negative_invalid_arg_size_local)
{ {
if (true)
{
log_info("Disabling this test temporarily, see internal issue 374.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper local_arg_kernel; clKernelWrapper local_arg_kernel;

View File

@@ -86,7 +86,8 @@ REGISTER_TEST(queue_hint)
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL); err = create_single_kernel_helper(context, &program, &kernel, 1,
queue_hint_test_kernel, "vec_cpy");
if (err != 0) if (err != 0)
{ {
return err; return err;

View File

@@ -757,7 +757,7 @@ REGISTER_TEST(spirv_query_dependencies)
} }
for (const auto& extension_dep : it->second.extensions) for (const auto& extension_dep : it->second.extensions)
{ {
log_error("Checked for SPIR-V extension %s.n", log_error("Checked for SPIR-V extension %s.\n",
extension_dep.c_str()); extension_dep.c_str());
} }
return TEST_FAIL; return TEST_FAIL;

View File

@@ -188,9 +188,17 @@ REGISTER_TEST(arrayimagecopy)
{ {
PASSIVE_REQUIRE_IMAGE_SUPPORT(device) PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_arrayimagecopy_single_format); test_arrayimagecopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_arrayimagecommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_arrayimagecopy_single_format);
}
return error;
} }
@@ -198,7 +206,15 @@ REGISTER_TEST(arrayimagecopy3d)
{ {
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device) PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
test_arrayimagecopy_single_format); test_arrayimagecopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_arrayimagecommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
test_arrayimagecopy_single_format);
}
return error;
} }

View File

@@ -72,16 +72,36 @@ static int test_arrayreadwrite_impl(cl_device_id device, cl_context context,
err = clEnqueueWriteBuffer( err = clEnqueueWriteBuffer(
queue, buffer, CL_TRUE, offset * sizeof(cl_uint), queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr); sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr);
if (flags & CL_MEM_IMMUTABLE_EXT)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueWriteBuffer is expected to fail "
"with CL_INVALID_OPERATION when the buffer "
"is created with CL_MEM_IMMUTABLE_EXT",
TEST_FAIL);
}
else
{
test_error(err, "clEnqueueWriteBuffer failed"); test_error(err, "clEnqueueWriteBuffer failed");
}
err = clEnqueueReadBuffer( err = clEnqueueReadBuffer(
queue, buffer, CL_TRUE, offset * sizeof(cl_uint), queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr); cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr);
test_error(err, "clEnqueueReadBuffer failed"); test_error(err, "clEnqueueReadBuffer failed");
const cl_uint* expected_buffer_values = nullptr;
if (flags & CL_MEM_IMMUTABLE_EXT)
{
expected_buffer_values = inptr.data();
}
else
{
expected_buffer_values = reference_vals.data();
}
for (int j = offset; j < offset + cb; j++) for (int j = offset; j < offset + cb; j++)
{ {
if (reference_vals[j] != outptr[j]) if (expected_buffer_values[j] != outptr[j])
{ {
log_error("ARRAY read, write test failed\n"); log_error("ARRAY read, write test failed\n");
err = -1; err = -1;
@@ -105,3 +125,11 @@ REGISTER_TEST(arrayreadwrite)
return test_arrayreadwrite_impl(device, context, queue, num_elements, return test_arrayreadwrite_impl(device, context, queue, num_elements,
CL_MEM_READ_WRITE); CL_MEM_READ_WRITE);
} }
REGISTER_TEST(immutable_arrayreadwrite)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_arrayreadwrite_impl(device, context, queue, num_elements,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR);
}

View File

@@ -14,6 +14,7 @@
// limitations under the License. // limitations under the License.
// //
#include "harness/compat.h" #include "harness/compat.h"
#include "errorHelpers.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -194,6 +195,43 @@ int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, si
return 0; return 0;
} }
int immutable_copy_region(size_t src, size_t soffset[3], size_t sregion[3],
size_t dst, size_t doffset[3], size_t dregion[3])
{
// Copy between cl buffers.
size_t src_slice_pitch =
(width[src] * height[src] != 1) ? width[src] * height[src] : 0;
size_t dst_slice_pitch =
(width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
size_t src_row_pitch = width[src];
cl_int err;
if (check_overlap_rect(soffset, doffset, sregion, src_row_pitch,
src_slice_pitch))
{
log_info("Copy overlap reported, skipping copy buffer rect\n");
return CL_SUCCESS;
}
else
{
err = clEnqueueCopyBufferRect(gQueue, buffer[src], buffer[dst], soffset,
doffset, sregion, /*dregion,*/
width[src], src_slice_pitch, width[dst],
dst_slice_pitch, 0, nullptr, nullptr);
if (err != CL_INVALID_OPERATION)
{
log_error(
"clEnqueueCopyBufferRect should return "
"CL_INVALID_OPERATION but returned %s between %zu and %zu",
IGetErrorString(err), src, dst);
return TEST_FAIL;
}
}
return TEST_PASS;
}
// This function compares the destination region in the buffer pointed // This function compares the destination region in the buffer pointed
// to by device, to the source region of the specified verify buffer. // to by device, to the source region of the specified verify buffer.
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) { int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
@@ -337,6 +375,32 @@ int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, s
return 0; return 0;
} }
int immutable_write_region(size_t src, size_t soffset[3], size_t sregion[3],
size_t dst, size_t doffset[3], size_t dregion[3])
{
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
size_t src_slice_pitch =
(width[src] * height[src] != 1) ? width[src] * height[src] : 0;
size_t dst_slice_pitch =
(width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
cl_int error = clEnqueueWriteBufferRect(
gQueue, buffer[dst], CL_TRUE, doffset, soffset, dregion, width[dst],
dst_slice_pitch, width[src], src_slice_pitch, tmp_buffer, 0, nullptr,
nullptr);
if (error != CL_INVALID_OPERATION)
{
log_error("clEnqueueWriteBufferRect should return CL_INVALID_OPERATION "
"but retured %s between %zu and %zu",
IGetErrorString(error), src, dst);
return TEST_FAIL;
}
return TEST_PASS;
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data ) void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{ {
free( data ); free( data );
@@ -591,3 +655,16 @@ REGISTER_TEST(bufferreadwriterect)
device, context, queue, num_elements, device, context, queue, num_elements,
CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions); CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions);
} }
REGISTER_TEST(immutable_bufferreadwriterect)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
TestFunctions test_functions;
test_functions.copy = immutable_copy_region;
test_functions.read = read_verify_region;
test_functions.write = immutable_write_region;
return test_bufferreadwriterect_impl(
device, context, queue, num_elements,
CL_MEM_USE_HOST_PTR | CL_MEM_IMMUTABLE_EXT, test_functions);
}

View File

@@ -103,11 +103,11 @@ REGISTER_TEST_VERSION(enqueued_local_size, Version(2, 0))
std::string cl_std = "-cl-std=CL"; std::string cl_std = "-cl-std=CL";
cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0"; cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
err = create_single_kernel_helper_with_build_options( err = create_single_kernel_helper(
context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
"test_enqueued_local_size_1d", cl_std.c_str()); "test_enqueued_local_size_1d", cl_std.c_str());
test_error(err, "create_single_kernel_helper failed"); test_error(err, "create_single_kernel_helper failed");
err = create_single_kernel_helper_with_build_options( err = create_single_kernel_helper(
context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
"test_enqueued_local_size_2d", cl_std.c_str()); "test_enqueued_local_size_2d", cl_std.c_str());
test_error(err, "create_single_kernel_helper failed"); test_error(err, "create_single_kernel_helper failed");

View File

@@ -27,6 +27,82 @@ using test_function_t = int (*)(cl_device_id, cl_context, cl_command_queue,
cl_mem_flags, cl_mem_flags, cl_mem_object_type, cl_mem_flags, cl_mem_flags, cl_mem_object_type,
const cl_image_format *); const cl_image_format *);
static int test_negative_imagearraycopy_single_format(
cl_device_id device, cl_context context, cl_command_queue queue,
cl_mem_flags image_flags, cl_mem_flags buffer_flags,
cl_mem_object_type image_type, const cl_image_format *format)
{
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper image;
clMemWrapper buffer;
const size_t img_width = 512;
const size_t img_height = 512;
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;
RandomSeed seed(gRandomSeed);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, img_depth };
log_info("Testing %s %s\n",
GetChannelOrderName(format->image_channel_order),
GetChannelTypeName(format->image_channel_data_type));
elem_size = get_pixel_size(format);
buffer_size =
sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
if (image_flags & CL_MEM_USE_HOST_PTR || image_flags & CL_MEM_COPY_HOST_PTR)
{
imgptr.reset(static_cast<cl_uchar *>(
create_random_data(kUChar, seed, buffer_size)));
}
bufptr.reset(
static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size)));
if (CL_MEM_OBJECT_IMAGE2D == image_type)
{
image = create_image_2d(context, image_flags, format, img_width,
img_height, 0, imgptr.get(), &err);
}
else
{
image =
create_image_3d(context, image_flags, format, img_width, img_height,
img_depth, 0, 0, imgptr.get(), &err);
}
test_error(err, "create_image_xd failed");
if (!(image_flags & CL_MEM_USE_HOST_PTR
|| image_flags & CL_MEM_COPY_HOST_PTR))
{
imgptr.reset(static_cast<cl_uchar *>(
create_random_data(kUChar, seed, buffer_size)));
err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0,
imgptr.get(), 0, nullptr, nullptr);
test_error(err, "clEnqueueWriteImage failed");
}
buffer =
clCreateBuffer(context, buffer_flags, buffer_size, bufptr.get(), &err);
test_error(err, "clCreateBuffer failed");
err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0,
nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_OPERATION,
"clEnqueueCopyImageToBuffer should return CL_INVALID_OPERATION when: "
"\" dst_buffer is created with CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
return TEST_PASS;
}
static int test_imagearraycopy_single_format( static int test_imagearraycopy_single_format(
cl_device_id device, cl_context context, cl_command_queue queue, cl_device_id device, cl_context context, cl_command_queue queue,
cl_mem_flags image_flags, cl_mem_flags buffer_flags, cl_mem_flags image_flags, cl_mem_flags buffer_flags,
@@ -188,9 +264,18 @@ REGISTER_TEST(imagearraycopy)
{ {
PASSIVE_REQUIRE_IMAGE_SUPPORT(device) PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE, int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_imagearraycopy_single_format); test_imagearraycopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_imagearraycommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
test_imagearraycopy_single_format);
}
return error;
} }
@@ -198,7 +283,38 @@ REGISTER_TEST(imagearraycopy3d)
{ {
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device) PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY, int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
test_imagearraycopy_single_format); test_imagearraycopy_single_format);
if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
{
error |= test_imagearraycommon(
device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
test_imagearraycopy_single_format);
}
return error;
}
REGISTER_TEST(negative_imagearraycopy)
{
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_OBJECT_IMAGE2D,
test_negative_imagearraycopy_single_format);
}
REGISTER_TEST(negative_imagearraycopy3d)
{
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device);
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_OBJECT_IMAGE3D,
test_negative_imagearraycopy_single_format);
} }

View File

@@ -50,8 +50,8 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
{ {
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
} }
cl_int err = create_single_kernel_helper_with_build_options( cl_int err = create_single_kernel_helper(context, &p, &k, 4,
context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr); sizeof_kernel_code, "test_sizeof");
test_error(err, "Failed to build kernel/program."); test_error(err, "Failed to build kernel/program.");
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err ); m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );

View File

@@ -19,19 +19,24 @@
#include "testBase.h" #include "testBase.h"
const cl_mem_flags flag_set[] = { const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR, CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR, CL_MEM_COPY_HOST_PTR,
0 0,
}; CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR,
CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR
| CL_MEM_ALLOC_HOST_PTR };
const char* flag_set_names[] = { const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR", "CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR", "CL_MEM_COPY_HOST_PTR",
"0" "0",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR",
"CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR",
}; };
int main( int argc, const char *argv[] ) int main( int argc, const char *argv[] )

View File

@@ -25,6 +25,6 @@
extern const cl_mem_flags flag_set[]; extern const cl_mem_flags flag_set[];
extern const char* flag_set_names[]; extern const char* flag_set_names[];
#define NUM_FLAGS 5 #define NUM_FLAGS 8
#endif // _testBase_h #endif // _testBase_h

View File

@@ -39,7 +39,8 @@ static int verify_copy_buffer(int *inptr, int *outptr, int n)
using alignedOwningPtr = std::unique_ptr<cl_int[], decltype(&align_free)>; using alignedOwningPtr = std::unique_ptr<cl_int[], decltype(&align_free)>;
static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d ) static int test_copy(cl_device_id device, cl_command_queue queue,
cl_context context, int num_elements, MTdata d)
{ {
clMemWrapper buffers[2]; clMemWrapper buffers[2];
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -76,10 +77,19 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL; return TEST_FAIL;
} }
const bool has_immutable_memory_extension =
is_extension_available(device, "cl_ext_immutable_memory_objects");
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{ {
if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
|| (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
&& !has_immutable_memory_extension)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (int i = 0; i < num_elements; i++) for (int i = 0; i < num_elements; i++)
@@ -89,7 +99,6 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
reference_ptr[i] = (int)genrand_int32(d); reference_ptr[i] = (int)genrand_int32(d);
} }
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
sizeof(cl_int) * num_elements, sizeof(cl_int) * num_elements,
@@ -116,7 +125,9 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL; return TEST_FAIL;
} }
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) { if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
&& !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements, sizeof(cl_int) * num_elements,
reference_ptr.get(), 0, nullptr, reference_ptr.get(), 0, nullptr,
@@ -130,11 +141,44 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0, err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0,
sizeof(cl_int) * num_elements, 0, nullptr, sizeof(cl_int) * num_elements, 0, nullptr,
nullptr); nullptr);
if ( err != CL_SUCCESS ){ if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
if (err != CL_INVALID_OPERATION)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueCopyBuffer should return "
"CL_INVALID_OPERATION when: "
"\"dst_buffer is created with "
"CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
return TEST_FAIL;
}
}
else if (err != CL_SUCCESS)
{
print_error(err, "clCopyArray failed\n"); print_error(err, "clCopyArray failed\n");
return TEST_FAIL; return TEST_FAIL;
} }
err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
num_elements))
{
log_error("test failed\n");
return TEST_FAIL;
}
else
{
log_info("test passed\n");
}
// Reset out_ptr
for (int i = 0; i < num_elements; i++)
{
out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
sizeof(int) * num_elements, out_ptr.get(), sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr); 0, nullptr, nullptr);
@@ -143,14 +187,20 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
return TEST_FAIL; return TEST_FAIL;
} }
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(), int *target_buffer = reference_ptr.get();
num_elements)) if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{ {
log_error( " test failed\n" ); target_buffer = invalid_ptr.get();
}
if (verify_copy_buffer(target_buffer, out_ptr.get(), num_elements))
{
log_error("test failed\n");
return TEST_FAIL; return TEST_FAIL;
} }
else{ else
log_info( " test passed\n" ); {
log_info("test passed\n");
} }
} // dst flags } // dst flags
} // src flags } // src flags
@@ -160,7 +210,10 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
} // end test_copy() } // end test_copy()
static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d ) static int testPartialCopy(cl_device_id device, cl_command_queue queue,
cl_context context, int num_elements,
cl_uint srcStart, cl_uint dstStart, int size,
MTdata d)
{ {
clMemWrapper buffers[2]; clMemWrapper buffers[2];
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -197,10 +250,19 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
return TEST_FAIL; return TEST_FAIL;
} }
const bool has_immutable_memory_extension =
is_extension_available(device, "cl_ext_immutable_memory_objects");
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{ {
if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
|| (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
&& !has_immutable_memory_extension)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (int i = 0; i < num_elements; i++) for (int i = 0; i < num_elements; i++)
@@ -236,7 +298,9 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
return TEST_FAIL; return TEST_FAIL;
} }
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){ if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
&& !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements, sizeof(cl_int) * num_elements,
reference_ptr.get(), 0, nullptr, reference_ptr.get(), 0, nullptr,
@@ -251,27 +315,72 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
queue, buffers[0], buffers[1], srcStart * sizeof(cl_int), queue, buffers[0], buffers[1], srcStart * sizeof(cl_int),
dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr, dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr,
nullptr); nullptr);
if ( err != CL_SUCCESS){ if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
print_error(err, "clEnqueueCopyBuffer failed\n"); {
if (err != CL_INVALID_OPERATION)
{
test_failure_error_ret(err, CL_INVALID_OPERATION,
"clEnqueueCopyBuffer should return "
"CL_INVALID_OPERATION when: "
"\"dst_buffer is created with "
"CL_MEM_IMMUTABLE_EXT flag\"",
TEST_FAIL);
}
}
else if (err != CL_SUCCESS)
{
print_error(err, "clCopyArray failed\n");
return TEST_FAIL; return TEST_FAIL;
} }
err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueReadBuffer failed\n");
return TEST_FAIL;
}
if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
num_elements))
{
log_error("test failed\n");
return TEST_FAIL;
}
else
{
log_info("test passed\n");
}
// Reset out_ptr
for (int i = 0; i < num_elements; i++)
{
out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
sizeof(int) * num_elements, out_ptr.get(), sizeof(int) * num_elements, out_ptr.get(),
0, nullptr, nullptr); 0, nullptr, nullptr);
if ( err != CL_SUCCESS){ if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueReadBuffer failed\n"); print_error(err, "clEnqueueReadBuffer failed\n");
return TEST_FAIL; return TEST_FAIL;
} }
if (verify_copy_buffer(reference_ptr.get() + srcStart, cl_int *target_buffer = reference_ptr.get() + srcStart;
out_ptr.get() + dstStart, size)) if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{ {
log_error("buffer_COPY test failed\n"); target_buffer = invalid_ptr.get();
}
if (verify_copy_buffer(target_buffer, out_ptr.get() + dstStart,
size))
{
log_error("test failed\n");
return TEST_FAIL; return TEST_FAIL;
} }
else{ else
log_info("buffer_COPY test passed\n"); {
log_info("test passed\n");
} }
} // dst mem flags } // dst mem flags
} // src mem flags } // src mem flags
@@ -289,7 +398,7 @@ REGISTER_TEST(buffer_copy)
// test the preset size // test the preset size
log_info( "set size: %d: ", num_elements ); log_info( "set size: %d: ", num_elements );
if (test_copy(queue, context, num_elements, d) != TEST_PASS) if (test_copy(device, queue, context, num_elements, d) != TEST_PASS)
{ {
err++; err++;
} }
@@ -298,7 +407,7 @@ REGISTER_TEST(buffer_copy)
for ( i = 0; i < 8; i++ ){ for ( i = 0; i < 8; i++ ){
size = (int)get_random_float(2.f,131072.f, d); size = (int)get_random_float(2.f,131072.f, d);
log_info( "random size: %d: ", size ); log_info( "random size: %d: ", size );
if (test_copy(queue, context, size, d) != TEST_PASS) if (test_copy(device, queue, context, size, d) != TEST_PASS)
{ {
err++; err++;
} }
@@ -324,8 +433,8 @@ REGISTER_TEST(buffer_partial_copy)
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d ); size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d ); dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size ); log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
if (testPartialCopy(queue, context, num_elements, srcStart, dstStart, if (testPartialCopy(device, queue, context, num_elements, srcStart,
size, d) dstStart, size, d)
!= TEST_PASS) != TEST_PASS)
{ {
err++; err++;

View File

@@ -598,6 +598,12 @@ static int test_buffer_fill(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clEventWrapper event[2]; clEventWrapper event[2];
clMemWrapper buffers[2]; clMemWrapper buffers[2];
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -721,6 +727,12 @@ REGISTER_TEST(buffer_fill_struct)
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
log_info("Testing with cl_mem_flags: %s\n", log_info("Testing with cl_mem_flags: %s\n",

View File

@@ -592,6 +592,12 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer; clMemWrapper buffer;
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){ if ( ! outptr[i] ){
@@ -671,6 +677,101 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
} // end test_buffer_map_read() } // end test_buffer_map_read()
int test_immutable_buffer_map(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements,
size_t size, const char *type, int loops)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
void *outptr[5];
cl_int err;
int i;
size_t ptrSizes[5];
int total_errors = 0;
MTdataHolder mtdata(gRandomSeed);
size_t min_alignment = get_min_alignment(context);
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
// embedded devices don't support long/ulong so skip over
if (!gHasLong && strstr(type, "long")) return TEST_SKIPPED_ITSELF;
for (i = 0; i < loops; i++)
{
for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Testing writing from immutable flags
if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
continue;
}
clMemWrapper buffer;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if (!outptr[i])
{
log_error(" unable to allocate %d bytes of memory\n",
(int)ptrSizes[i] * num_elements);
return TEST_FAIL;
}
generate_random_data(kUChar, ptrSizes[i] * num_elements, mtdata,
outptr[i]);
buffer =
clCreateBuffer(context, flag_set[src_flag_id],
ptrSizes[i] * num_elements, outptr[i], &err);
if (nullptr == buffer || CL_SUCCESS != err)
{
print_error(err, "clCreateBuffer failed\n");
align_free(outptr[i]);
return TEST_FAIL;
}
void *mappedPtr = clEnqueueMapBuffer(
queue, buffer, CL_TRUE, CL_MAP_READ, 0,
ptrSizes[i] * num_elements, 0, nullptr, nullptr, &err);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueMapBuffer failed");
align_free(outptr[i]);
return TEST_FAIL;
}
if (memcmp(mappedPtr, outptr[i], ptrSizes[i] * num_elements) != 0)
{
log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1 << i, flag_set_names[src_flag_id]);
total_errors++;
}
else
{
log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1 << i, flag_set_names[src_flag_id]);
}
err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, nullptr,
nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
// If we are using the outptr[i] as backing via USE_HOST_PTR we need
// to make sure we are done before freeing.
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR))
{
err = clFinish(queue);
test_error(err, "clFinish failed");
}
align_free(outptr[i]);
}
} // cl_mem_flags
return total_errors > 0 ? TEST_FAIL : TEST_PASS;
}
#define DECLARE_LOCK_TEST(type, realType) \ #define DECLARE_LOCK_TEST(type, realType) \
REGISTER_TEST(buffer_map_read_##type) \ REGISTER_TEST(buffer_map_read_##type) \
@@ -691,6 +792,28 @@ DECLARE_LOCK_TEST(char, cl_char)
DECLARE_LOCK_TEST(uchar, cl_uchar) DECLARE_LOCK_TEST(uchar, cl_uchar)
DECLARE_LOCK_TEST(float, cl_float) DECLARE_LOCK_TEST(float, cl_float)
#undef DECLARE_LOCK_TEST
#define DECLARE_LOCK_TEST(type, realType) \
REGISTER_TEST(immutable_buffer_map_##type) \
{ \
return test_immutable_buffer_map(device, context, queue, num_elements, \
sizeof(realType), #type, 5); \
}
DECLARE_LOCK_TEST(int, cl_int)
DECLARE_LOCK_TEST(uint, cl_uint)
DECLARE_LOCK_TEST(long, cl_long)
DECLARE_LOCK_TEST(ulong, cl_ulong)
DECLARE_LOCK_TEST(short, cl_short)
DECLARE_LOCK_TEST(ushort, cl_ushort)
DECLARE_LOCK_TEST(char, cl_char)
DECLARE_LOCK_TEST(uchar, cl_uchar)
DECLARE_LOCK_TEST(float, cl_float)
#undef DECLARE_LOCK_TEST
REGISTER_TEST(buffer_map_read_struct) REGISTER_TEST(buffer_map_read_struct)
{ {
int (*foo)(void *,int); int (*foo)(void *,int);

View File

@@ -666,6 +666,12 @@ static int test_buffer_read(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer; clMemWrapper buffer;
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){ if ( ! outptr[i] ){
@@ -809,6 +815,12 @@ static int test_buffer_read_async(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer; clMemWrapper buffer;
clEventWrapper event; clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -946,6 +958,12 @@ static int test_buffer_read_array_barrier(
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffer; clMemWrapper buffer;
clEventWrapper event; clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);

View File

@@ -660,8 +660,18 @@ static int test_buffer_write(cl_device_id deviceID, cl_context context,
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffers[2]; clMemWrapper buffers[2];
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -834,8 +844,19 @@ REGISTER_TEST(buffer_write_struct)
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{ {
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
clMemWrapper buffers[2]; clMemWrapper buffers[2];
inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment); inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -996,7 +1017,17 @@ static int test_buffer_write_array_async(
ptrSizes[4] = ptrSizes[3] << 1; ptrSizes[4] = ptrSizes[3] << 1;
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) { for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
// Skip immutable memory flags
if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
loops = ( loops < 5 ? loops : 5 ); loops = ( loops < 5 ? loops : 5 );
@@ -1974,3 +2005,256 @@ REGISTER_TEST(buffer_write_async_ulong)
} // end test_buffer_ulong_write_array_async() } // end test_buffer_ulong_write_array_async()
int immutable_test_buffer_write(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements,
size_t size, const char *type, int loops,
void *inptr[5], const char *kernelCode[],
const char *kernelName[],
int (*fn)(void *, void *, int), MTdataHolder &d)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
void *outptr[5];
clProgramWrapper program[5];
clKernelWrapper kernel[5];
size_t ptrSizes[5];
size_t global_work_size[3];
cl_int err;
int i;
int src_flag_id, dst_flag_id;
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (size_t)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
loops = (loops < 5 ? loops : 5);
for (i = 0; i < loops; i++)
{
err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
&kernelCode[i], kernelName[i]);
if (err)
{
log_error(" Error creating program for %s\n", type);
return TEST_FAIL;
}
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
// Testing writing from immutable flags
if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
{
continue;
}
for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
{
// Skip immutable memory flags
if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
{
continue;
}
cl_mem_flags src_mem_flags = flag_set[src_flag_id];
cl_mem_flags dst_mem_flags = flag_set[dst_flag_id];
clMemWrapper buffers[2];
buffers[0] =
clCreateBuffer(context, src_mem_flags,
ptrSizes[i] * num_elements, inptr[i], &err);
if (nullptr == buffers[0] || CL_SUCCESS != err)
{
align_free(outptr[i]);
print_error(err, " clCreateBuffer failed\n");
return TEST_FAIL;
}
if (!strcmp(type, "half"))
{
outptr[i] = align_malloc(ptrSizes[i] * (num_elements * 2),
min_alignment);
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * 2 * num_elements,
outptr[i], &err);
}
else
{
outptr[i] =
align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ((dst_mem_flags & CL_MEM_USE_HOST_PTR)
|| (dst_mem_flags & CL_MEM_COPY_HOST_PTR))
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * num_elements,
outptr[i], &err);
else
buffers[1] = clCreateBuffer(context, dst_mem_flags,
ptrSizes[i] * num_elements,
nullptr, &err);
}
if (err)
{
align_free(outptr[i]);
print_error(err, " clCreateBuffer failed\n");
return TEST_FAIL;
}
err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
(void *)&buffers[0]);
err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
(void *)&buffers[1]);
if (err != CL_SUCCESS)
{
align_free(outptr[i]);
print_error(err, " clSetKernelArg failed");
return TEST_FAIL;
}
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, nullptr,
global_work_size, nullptr, 0,
nullptr, nullptr);
if (err != CL_SUCCESS)
{
print_error(err, " clEnqueueNDRangeKernel failed");
align_free(outptr[i]);
return TEST_FAIL;
}
err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
ptrSizes[i] * num_elements, outptr[i],
0, nullptr, nullptr);
if (err != CL_SUCCESS)
{
align_free(outptr[i]);
print_error(err, " clEnqueueReadBuffer failed");
return TEST_FAIL;
}
if (fn(inptr[i], outptr[i],
(int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0])))
{
log_error(
" %s%d test failed. cl_mem_flags src: %s, dst: %s\n",
type, 1 << i, flag_set_names[src_flag_id],
flag_set_names[dst_flag_id]);
total_errors++;
}
else
{
log_info(
" %s%d test passed. cl_mem_flags src: %s, dst: %s\n",
type, 1 << i, flag_set_names[src_flag_id],
flag_set_names[dst_flag_id]);
}
// cleanup
align_free(outptr[i]);
}
} // dst cl_mem_flag
} // src cl_mem_flag
return total_errors;
} // end test_buffer_write()
REGISTER_TEST(write_from_immutable_buffer_to_buffer)
{
REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
static const char *immutable_buffer_write_int_kernel_code[] = {
R"(
__kernel void test_buffer_write_int(constant int *src, __global int *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int2(constant int2 *src, __global int2 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int4(constant int4 *src, __global int4 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int8(constant int8 *src, __global int8 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})",
R"(
__kernel void test_buffer_write_int16(constant int16 *src, __global int16 *dst)
{
int tid = get_global_id(0);
dst[tid] = src[tid];
})"
};
static const char *immutable_int_kernel_name[] = {
"test_buffer_write_int", "test_buffer_write_int2",
"test_buffer_write_int4", "test_buffer_write_int8",
"test_buffer_write_int16"
};
if (gTestMap)
{
log_error("Immutable buffers cannot be mapped with CL_MEM_WRITE\n");
return TEST_SKIPPED_ITSELF;
}
int *inptr[5];
size_t ptrSizes[5];
int i, err;
cl_uint j;
int (*foo)(void *, void *, int);
MTdataHolder d(gRandomSeed);
size_t min_alignment = get_min_alignment(context);
foo = verify_write_int;
ptrSizes[0] = sizeof(cl_int);
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
for (i = 0; i < 5; i++)
{
inptr[i] =
(int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
for (j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++)
inptr[i][j] = (int)genrand_int32(d);
}
err = immutable_test_buffer_write(device, context, queue, num_elements,
sizeof(cl_int), "int", 5, (void **)inptr,
immutable_buffer_write_int_kernel_code,
immutable_int_kernel_name, foo, d);
for (i = 0; i < 5; i++)
{
align_free((void *)inptr[i]);
}
return err;
}

View File

@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;} template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;} template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; } template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue() template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
{ {
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode); return -CL_HALF_MAX;
} }
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
{ {
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
} }
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;} template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;} template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue() template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
{ {
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode); return CL_HALF_MAX;
} }
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
{ {

View File

@@ -22,12 +22,14 @@
#include "host_atomics.h" #include "host_atomics.h"
#include "CL/cl_half.h" #include <algorithm>
#include <iomanip> #include <iomanip>
#include <limits>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include "CL/cl_half.h"
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount() #define MAX_HOST_THREADS GetThreadCount()
@@ -76,6 +78,10 @@ extern int
extern cl_device_atomic_capabilities gAtomicMemCap, extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_device_fp_config gDoubleFPConfig;
extern cl_device_fp_config gFloatFPConfig;
extern cl_device_fp_config gHalfFPConfig;
extern cl_half_rounding_mode gHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported; extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
@@ -91,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes(
cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders, cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
std::vector<TExplicitMemoryScopeType> &memoryScopes); std::vector<TExplicitMemoryScopeType> &memoryScopes);
union FloatIntUnion {
float f;
uint32_t i;
};
template <typename HostDataType> bool is_qnan(const HostDataType &value)
{
if constexpr (std::is_same_v<HostDataType, float>)
{
FloatIntUnion u;
u.f = value;
if ((u.i & 0x7F800000) != 0x7F800000) return false;
return (u.i & 0x00400000) != 0;
}
else
return std::isnan(value);
}
template <typename HostDataType> bool is_snan(const HostDataType &value)
{
if constexpr (std::is_same_v<HostDataType, float>)
{
FloatIntUnion u;
u.f = value;
if ((u.i & 0x7F800000) != 0x7F800000) return false;
return (u.i & 0x00400000) == 0;
}
else
return std::isnan(value);
}
class AtomicTypeInfo { class AtomicTypeInfo {
public: public:
TExplicitAtomicType _type; TExplicitAtomicType _type;
@@ -154,12 +191,12 @@ public:
return 0; return 0;
} }
CBasicTest(TExplicitAtomicType dataType, bool useSVM) CBasicTest(TExplicitAtomicType dataType, bool useSVM)
: CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType), : CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255),
_useSVM(useSVM), _startValue(255), _localMemory(false), _localMemory(false), _declaredInProgram(false),
_declaredInProgram(false), _usedInFunction(false), _usedInFunction(false), _genericAddrSpace(false),
_genericAddrSpace(false), _oldValueCheck(true), _oldValueCheck(true), _localRefValues(false), _maxGroupSize(0),
_localRefValues(false), _maxGroupSize(0), _passCount(0), _passCount(0), _iterations(gInternalIterations),
_iterations(gInternalIterations) _maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0)
{} {}
virtual ~CBasicTest() virtual ~CBasicTest()
{ {
@@ -178,12 +215,15 @@ public:
{ {
return false; return false;
} }
virtual bool virtual bool
IsTestNotAsExpected(const HostDataType &expected, IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) cl_uint whichDestValue)
{ {
return expected != testValues[whichDestValue]; return expected
!= static_cast<HostDataType>(testValues[whichDestValue]);
} }
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) MTdata d)
@@ -239,12 +279,12 @@ public:
cl_command_queue queue) cl_command_queue queue)
{ {
int error = 0; int error = 0;
DeclaredInProgram(false); SetDeclaredInProgram(false);
EXECUTE_TEST(error, EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue)); ExecuteForEachPointerType(deviceID, context, queue));
if (!UseSVM()) if (!UseSVM())
{ {
DeclaredInProgram(true); SetDeclaredInProgram(true);
EXECUTE_TEST(error, EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue)); ExecuteForEachPointerType(deviceID, context, queue));
} }
@@ -255,13 +295,13 @@ public:
cl_command_queue queue) cl_command_queue queue)
{ {
int error = 0; int error = 0;
if (_maxDeviceThreads > 0 && !UseSVM()) if (_deviceThreads > 0 && !UseSVM())
{ {
SetLocalMemory(true); SetLocalMemory(true);
EXECUTE_TEST( EXECUTE_TEST(
error, ExecuteForEachDeclarationType(deviceID, context, queue)); error, ExecuteForEachDeclarationType(deviceID, context, queue));
} }
if (_maxDeviceThreads + MaxHostThreads() > 0) if (_deviceThreads + MaxHostThreads() > 0)
{ {
SetLocalMemory(false); SetLocalMemory(false);
EXECUTE_TEST( EXECUTE_TEST(
@@ -270,7 +310,7 @@ public:
return error; return error;
} }
virtual int Execute(cl_device_id deviceID, cl_context context, virtual int Execute(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements) cl_command_queue queue, int num_elements) override
{ {
if (sizeof(HostAtomicType) != DataType().Size(deviceID)) if (sizeof(HostAtomicType) != DataType().Size(deviceID))
{ {
@@ -310,7 +350,12 @@ public:
if (UseSVM()) return 0; if (UseSVM()) return 0;
_maxDeviceThreads = 0; _maxDeviceThreads = 0;
} }
if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
_deviceThreads = (num_elements > 0)
? std::min(cl_uint(num_elements), _maxDeviceThreads)
: _maxDeviceThreads;
if (_deviceThreads + MaxHostThreads() == 0) return 0;
return ExecuteForEachParameterSet(deviceID, context, queue); return ExecuteForEachParameterSet(deviceID, context, queue);
} }
virtual void HostFunction(cl_uint tid, cl_uint threadCount, virtual void HostFunction(cl_uint tid, cl_uint threadCount,
@@ -323,7 +368,7 @@ public:
{ {
return AtomicTypeExtendedInfo<HostDataType>(_dataType); return AtomicTypeExtendedInfo<HostDataType>(_dataType);
} }
cl_uint _maxDeviceThreads;
virtual cl_uint MaxHostThreads() virtual cl_uint MaxHostThreads()
{ {
if (UseSVM() || gHost) if (UseSVM() || gHost)
@@ -420,7 +465,7 @@ public:
HostDataType StartValue() { return _startValue; } HostDataType StartValue() { return _startValue; }
void SetLocalMemory(bool local) { _localMemory = local; } void SetLocalMemory(bool local) { _localMemory = local; }
bool LocalMemory() { return _localMemory; } bool LocalMemory() { return _localMemory; }
void DeclaredInProgram(bool declaredInProgram) void SetDeclaredInProgram(bool declaredInProgram)
{ {
_declaredInProgram = declaredInProgram; _declaredInProgram = declaredInProgram;
} }
@@ -477,6 +522,8 @@ private:
cl_uint _currentGroupSize; cl_uint _currentGroupSize;
cl_uint _passCount; cl_uint _passCount;
const cl_int _iterations; const cl_int _iterations;
cl_uint _maxDeviceThreads;
cl_uint _deviceThreads;
}; };
template <typename HostAtomicType, typename HostDataType> template <typename HostAtomicType, typename HostDataType>
@@ -702,6 +749,28 @@ public:
cl_context context, cl_context context,
cl_command_queue queue) cl_command_queue queue)
{ {
// Comparator for orders and scopes.
const auto checkValidity = [](TExplicitMemoryOrderType success,
TExplicitMemoryOrderType failure,
TExplicitMemoryScopeType scope) {
// Both memory order arguments must be set (or neither).
if ((success == MEMORY_ORDER_EMPTY || failure == MEMORY_ORDER_EMPTY)
&& success != failure)
return false;
// Memory scope without memory order is disallowed.
if (success == MEMORY_ORDER_EMPTY && scope != MEMORY_SCOPE_EMPTY)
return false;
// Failure must not be release or acq_rel.
if (failure == MEMORY_ORDER_RELEASE
|| failure == MEMORY_ORDER_ACQ_REL)
return false;
// Failure must not be stronger than success.
return failure <= success;
};
// repeat test for each reasonable memory order/scope combination // repeat test for each reasonable memory order/scope combination
std::vector<TExplicitMemoryOrderType> memoryOrder; std::vector<TExplicitMemoryOrderType> memoryOrder;
std::vector<TExplicitMemoryScopeType> memoryScope; std::vector<TExplicitMemoryScopeType> memoryScope;
@@ -719,16 +788,10 @@ public:
{ {
for (unsigned si = 0; si < memoryScope.size(); si++) for (unsigned si = 0; si < memoryScope.size(); si++)
{ {
if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY if (!checkValidity(memoryOrder[oi], memoryOrder[o2i],
|| memoryOrder[o2i] == MEMORY_ORDER_EMPTY) memoryScope[si]))
&& memoryOrder[oi] != memoryOrder[o2i]) continue;
continue; // both memory order arguments must be set (or
// none)
if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
|| memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
&& memoryScope[si] != MEMORY_SCOPE_EMPTY)
continue; // memory scope without memory order is not
// allowed
MemoryOrder(memoryOrder[oi]); MemoryOrder(memoryOrder[oi]);
MemoryOrder2(memoryOrder[o2i]); MemoryOrder2(memoryOrder[o2i]);
MemoryScope(memoryScope[si]); MemoryScope(memoryScope[si]);
@@ -895,12 +958,25 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+ ss.str() + "] = {\n"; + ss.str() + "] = {\n";
ss.str(""); ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type if constexpr (
== TYPE_ATOMIC_FLOAT) std::is_same_v<
ss << std::setprecision(10) << _startValue; HostDataType,
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
== TYPE_ATOMIC_HALF) {
ss << cl_half_to_float(static_cast<cl_half>(_startValue)); if (std::isinf(_startValue))
ss << (_startValue < 0 ? "-" : "") << "INFINITY";
else if (std::isnan(_startValue))
ss << "0.0 / 0.0";
else
ss << std::setprecision(
std::numeric_limits<HostDataType>::max_digits10)
<< _startValue;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
ss << std::setprecision(std::numeric_limits<float>::max_digits10)
<< cl_half_to_float(_startValue);
}
else else
ss << _startValue; ss << _startValue;
@@ -1137,7 +1213,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
MTdata d; MTdata d;
size_t typeSize = DataType().Size(deviceID); size_t typeSize = DataType().Size(deviceID);
deviceThreadCount = _maxDeviceThreads; deviceThreadCount = _deviceThreads;
hostThreadCount = MaxHostThreads(); hostThreadCount = MaxHostThreads();
threadCount = deviceThreadCount + hostThreadCount; threadCount = deviceThreadCount + hostThreadCount;
@@ -1202,9 +1278,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
+ FunctionCode() + KernelCode(numDestItems); + FunctionCode() + KernelCode(numDestItems);
programLine = programSource.c_str(); programLine = programSource.c_str();
if (create_single_kernel_helper_with_build_options( if (create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, &programLine, &programLine, "test_atomic_kernel"))
"test_atomic_kernel", gOldAPI ? "" : nullptr))
{ {
return -1; return -1;
} }
@@ -1289,7 +1364,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
numDestItems = NumResults(threadCount, deviceID); numDestItems = NumResults(threadCount, deviceID);
destItems.resize(numDestItems); destItems.resize(numDestItems);
for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue; for (cl_uint i = 0; i < numDestItems; i++)
destItems[i] = static_cast<HostAtomicType>(_startValue);
// Create main buffer with atomic variables (array size dependent on // Create main buffer with atomic variables (array size dependent on
// particular test) // particular test)
@@ -1462,12 +1538,13 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
startRefValues.size() ? &startRefValues[0] : 0, i)) startRefValues.size() ? &startRefValues[0] : 0, i))
break; // no expected value function provided break; // no expected value function provided
if (IsTestNotAsExpected(expected, destItems, i)) if (IsTestNotAsExpected(expected, destItems, startRefValues, i))
{ {
std::stringstream logLine; std::stringstream logLine;
logLine << "ERROR: Result " << i logLine << "ERROR: Result " << i
<< " from kernel does not validate! (should be " << expected << " from kernel does not validate! (should be " << expected
<< ", was " << destItems[i] << ")\n"; << ", was " << static_cast<HostDataType>(destItems[i])
<< ")\n";
log_error("%s", logLine.str().c_str()); log_error("%s", logLine.str().c_str());
for (i = 0; i < threadCount; i++) for (i = 0; i < threadCount; i++)
{ {
@@ -1534,7 +1611,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
// clEnqueueNDRangeKernel // clEnqueueNDRangeKernel
{ {
/* Re-write the starting value */ /* Re-write the starting value */
for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue; for (size_t i = 0; i < numDestItems; i++)
destItems[i] = static_cast<HostAtomicType>(_startValue);
refValues[0] = 0; refValues[0] = 0;
if (deviceThreadCount > 0) if (deviceThreadCount > 0)
{ {

View File

@@ -18,12 +18,15 @@
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include <mutex> #include <mutex>
#include "CL/cl_half.h" #include "CL/cl_half.h"
#ifdef WIN32 #ifdef WIN32
#include "Windows.h" #include "Windows.h"
#endif #endif
extern cl_half_rounding_mode gHalfRoundingMode;
//flag for test verification (good test should discover non-atomic functions and fail) //flag for test verification (good test should discover non-atomic functions and fail)
//#define NON_ATOMIC_FUNCTIONS //#define NON_ATOMIC_FUNCTIONS
@@ -37,6 +40,93 @@ enum TExplicitMemoryOrderType
MEMORY_ORDER_SEQ_CST MEMORY_ORDER_SEQ_CST
}; };
// Wrapper class for half-precision
class HostHalf {
public:
// Convert from semantic values
HostHalf(cl_uint value = 0)
: value(
cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
{}
HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
{}
HostHalf(double value): HostHalf(static_cast<float>(value)) {}
// Convert to semantic values
operator cl_uint() const
{
return static_cast<cl_uint>(cl_half_to_float(value));
}
operator float() const { return cl_half_to_float(value); }
operator double() const
{
return static_cast<double>(cl_half_to_float(value));
}
// Construct from bit representation
HostHalf(cl_half value): value(value) {}
// Get the underlying bit representation
operator cl_half() const { return value; }
HostHalf operator-() const
{
return HostHalf(
cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
}
#define GENERIC_OP(RetType, op) \
RetType operator op(const HostHalf &other) const \
{ \
return RetType(cl_half_to_float(value) \
op cl_half_to_float(other.value)); \
}
GENERIC_OP(bool, ==)
GENERIC_OP(bool, !=)
GENERIC_OP(bool, <)
GENERIC_OP(bool, <=)
GENERIC_OP(bool, >)
GENERIC_OP(bool, >=)
GENERIC_OP(HostHalf, +)
GENERIC_OP(HostHalf, -)
GENERIC_OP(HostHalf, *)
GENERIC_OP(HostHalf, /)
#undef GENERIC_OP
#define INPLACE_OP(op) \
HostHalf &operator op##=(const HostHalf &other) \
{ \
value = cl_half_from_float(cl_half_to_float(value) \
op cl_half_to_float(other.value), \
gHalfRoundingMode); \
return *this; \
}
INPLACE_OP(+)
INPLACE_OP(-)
INPLACE_OP(*)
INPLACE_OP(/)
#undef INPLACE_OP
friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
{
float f = cl_half_to_float(hh.value);
os << f;
return os;
}
private:
cl_half value;
};
namespace std {
inline HostHalf abs(const HostHalf &value)
{
return value < HostHalf(0) ? -value : value;
}
} // namespace std
// host atomic types (applicable for atomic functions supported on host OS) // host atomic types (applicable for atomic functions supported on host OS)
#ifdef WIN32 #ifdef WIN32
#define HOST_ATOMIC_INT unsigned long #define HOST_ATOMIC_INT unsigned long
@@ -73,7 +163,7 @@ enum TExplicitMemoryOrderType
#define HOST_UINT cl_uint #define HOST_UINT cl_uint
#define HOST_LONG cl_long #define HOST_LONG cl_long
#define HOST_ULONG cl_ulong #define HOST_ULONG cl_ulong
#define HOST_HALF cl_half #define HOST_HALF HostHalf
#define HOST_FLOAT cl_float #define HOST_FLOAT cl_float
#define HOST_DOUBLE cl_double #define HOST_DOUBLE cl_double
@@ -91,6 +181,18 @@ enum TExplicitMemoryOrderType
extern cl_half_rounding_mode gHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode;
template <typename HostAtomicType>
constexpr bool is_host_atomic_fp_v =
std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
template <typename HostDataType>
constexpr bool is_host_fp_v =
std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
std::is_same<HostDataType, HOST_FLOAT>,
std::is_same<HostDataType, HOST_DOUBLE>>;
// host atomic functions // host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order); void host_atomic_thread_fence(TExplicitMemoryOrderType order);
@@ -98,24 +200,13 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>) if constexpr (is_host_atomic_fp_v<AtomicType>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a; CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)), CorrespondingType new_value = old_value + c;
gHalfRoundingMode); *a = static_cast<AtomicType>(new_value);
return old_value;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a += c;
return old_value; return old_value;
} }
else else
@@ -135,21 +226,13 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>) if constexpr (is_host_atomic_fp_v<AtomicType>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a; CorrespondingType old_value = *a;
*a -= c; CorrespondingType new_value = old_value - c;
return old_value; *a = static_cast<AtomicType>(new_value);
}
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
gHalfRoundingMode);
return old_value; return old_value;
} }
else else
@@ -170,12 +253,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
if (sizeof(CorrespondingType) == 2) if constexpr (sizeof(CorrespondingType) == 2)
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c); return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
*reinterpret_cast<SHORT *>(&c));
else else
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c); return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
*reinterpret_cast<LONG *>(&c));
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_lock_test_and_set(a, c); return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
#else #else
log_info("Host function not implemented: atomic_exchange\n"); log_info("Host function not implemented: atomic_exchange\n");
return 0; return 0;
@@ -192,30 +277,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_failure) TExplicitMemoryOrderType order_failure)
{ {
CorrespondingType tmp; CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>) if constexpr (is_host_atomic_fp_v<AtomicType>)
{ {
static std::mutex mtx; static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx); std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile cl_half *>(a); tmp = static_cast<CorrespondingType>(*a);
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
{
*reinterpret_cast<volatile cl_half *>(a) = desired;
return true;
}
*expected = tmp;
}
else if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected) if (tmp == *expected)
{ {
*a = desired; *a = static_cast<AtomicType>(desired);
return true; return true;
} }
*expected = tmp; *expected = tmp;
@@ -241,8 +310,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
if (sizeof(CorrespondingType) == 2) if constexpr (sizeof(CorrespondingType) == 2)
auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0); return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
else else
return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0); return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
#elif defined(__GNUC__) #elif defined(__GNUC__)

View File

@@ -31,11 +31,14 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
cl_device_atomic_capabilities gAtomicMemCap, cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device gAtomicFenceCap; // atomic memory and fence capabilities for this device
cl_device_fp_config gDoubleFPConfig = 0;
cl_device_fp_config gFloatFPConfig = 0;
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false; bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
cl_device_fp_config gHalfFPConfig = 0;
test_status InitCL(cl_device_id device) { test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device); auto version = get_device_cl_version(device);
@@ -134,13 +137,17 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics")) if (is_extension_available(device, "cl_ext_float_atomics"))
{ {
gFloatAtomicsSupported = true; gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64")) if (is_extension_available(device, "cl_khr_fp64"))
{ {
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr); sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
sizeof(gDoubleFPConfig), &gDoubleFPConfig,
NULL);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
} }
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(
@@ -148,6 +155,13 @@ test_status InitCL(cl_device_id device) {
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
test_error_ret(
error,
"Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16")) if (is_extension_available(device, "cl_khr_fp16"))
{ {
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(
@@ -170,6 +184,11 @@ test_status InitCL(cl_device_id device) {
log_error("Error while acquiring half rounding mode\n"); log_error("Error while acquiring half rounding mode\n");
return TEST_FAIL; return TEST_FAIL;
} }
error =
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
sizeof(gHalfFPConfig), &gHalfFPConfig, NULL);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -26,9 +26,9 @@ class DirectXWrapper {
public: public:
DirectXWrapper(); DirectXWrapper();
ID3D12Device* getDXDevice() const; [[nodiscard]] ID3D12Device* getDXDevice() const;
ID3D12CommandQueue* getDXCommandQueue() const; [[nodiscard]] ID3D12CommandQueue* getDXCommandQueue() const;
ID3D12CommandAllocator* getDXCommandAllocator() const; [[nodiscard]] ID3D12CommandAllocator* getDXCommandAllocator() const;
protected: protected:
ComPtr<ID3D12Device> dx_device = nullptr; ComPtr<ID3D12Device> dx_device = nullptr;
@@ -39,7 +39,7 @@ protected:
class DirectXFenceWrapper { class DirectXFenceWrapper {
public: public:
DirectXFenceWrapper(ID3D12Device* dx_device); DirectXFenceWrapper(ID3D12Device* dx_device);
ID3D12Fence* operator*() const { return dx_fence.Get(); } [[nodiscard]] ID3D12Fence* get() const { return dx_fence.Get(); }
private: private:
ComPtr<ID3D12Fence> dx_fence = nullptr; ComPtr<ID3D12Fence> dx_fence = nullptr;

View File

@@ -104,7 +104,8 @@
VK_FUNC_DECL(vkGetImageSubresourceLayout) \ VK_FUNC_DECL(vkGetImageSubresourceLayout) \
VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT) \ VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT) \
VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT) \ VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT) \
VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties) VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties) \
VK_FUNC_DECL(vkGetPhysicalDeviceFeatures2)
#define VK_WINDOWS_FUNC_LIST \ #define VK_WINDOWS_FUNC_LIST \
VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \ VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \
VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR) \ VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR) \
@@ -209,5 +210,6 @@
#define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT #define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT
#define vkGetPhysicalDeviceExternalBufferProperties \ #define vkGetPhysicalDeviceExternalBufferProperties \
_vkGetPhysicalDeviceExternalBufferProperties _vkGetPhysicalDeviceExternalBufferProperties
#define vkGetPhysicalDeviceFeatures2 _vkGetPhysicalDeviceFeatures2
#endif //_vulkan_api_list_hpp_ #endif //_vulkan_api_list_hpp_

View File

@@ -147,6 +147,7 @@ VulkanInstance::VulkanInstance(bool useValidationLayers)
// return WAIVED; // return WAIVED;
} }
VK_GET_NULL_INSTANCE_PROC_ADDR(vkGetPhysicalDeviceFeatures2);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion); VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties); VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance); VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
@@ -612,7 +613,8 @@ VulkanDevice::VulkanDevice(const VulkanDevice &device)
VulkanDevice::VulkanDevice( VulkanDevice::VulkanDevice(
const VulkanPhysicalDevice &physicalDevice, const VulkanPhysicalDevice &physicalDevice,
const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap) const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap,
bool useShaderInt8)
: m_physicalDevice(physicalDevice), m_vkDevice(NULL) : m_physicalDevice(physicalDevice), m_vkDevice(NULL)
{ {
uint32_t maxQueueCount = 0; uint32_t maxQueueCount = 0;
@@ -676,7 +678,55 @@ VulkanDevice::VulkanDevice(
enabledExtensionNameList.data(); enabledExtensionNameList.data();
vkDeviceCreateInfo.pEnabledFeatures = NULL; vkDeviceCreateInfo.pEnabledFeatures = NULL;
if (useShaderInt8)
{
VkPhysicalDeviceShaderFloat16Int8Features int8Features{};
int8Features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
VkPhysicalDevice8BitStorageFeatures storage8Features{};
storage8Features.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
int8Features.pNext = &storage8Features;
VkPhysicalDeviceFeatures2 features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &int8Features;
vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
if (!int8Features.shaderInt8
|| !storage8Features.storageBuffer8BitAccess)
{
throw std::runtime_error("shaderInt8 not supported!\n");
}
VkPhysicalDevice8BitStorageFeatures storage8Enable{};
storage8Enable.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
storage8Enable.storageBuffer8BitAccess = VK_TRUE;
VkPhysicalDeviceShaderFloat16Int8Features int8Enable{};
int8Enable.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
int8Enable.shaderInt8 = VK_TRUE;
int8Enable.pNext = &storage8Enable;
vkDeviceCreateInfo.pNext = &int8Enable;
enabledExtensionNameList.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
vkDeviceCreateInfo.ppEnabledExtensionNames =
enabledExtensionNameList.data();
vkDeviceCreateInfo.enabledExtensionCount =
(uint32_t)enabledExtensionNameList.size();
vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice); vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
}
else
{
vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
}
for (uint32_t qfIdx = 0; for (uint32_t qfIdx = 0;
qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size(); qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
@@ -1071,7 +1121,8 @@ VulkanComputePipeline::VulkanComputePipeline(
VulkanComputePipeline::VulkanComputePipeline( VulkanComputePipeline::VulkanComputePipeline(
const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout, const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
const VulkanShaderModule &shaderModule, const std::string &entryFuncName) const VulkanShaderModule &shaderModule, const std::string &entryFuncName,
const VkSpecializationInfo *spec)
: VulkanPipeline(device) : VulkanPipeline(device)
{ {
VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {}; VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
@@ -1084,6 +1135,8 @@ VulkanComputePipeline::VulkanComputePipeline(
vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str(); vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL; vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
if (spec) vkPipelineShaderStageCreateInfo.pSpecializationInfo = spec;
VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {}; VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
vkComputePipelineCreateInfo.sType = vkComputePipelineCreateInfo.sType =
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;

View File

@@ -148,7 +148,8 @@ public:
VulkanDevice( VulkanDevice(
const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(), const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap = const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
getDefaultVulkanQueueFamilyToQueueCountMap()); getDefaultVulkanQueueFamilyToQueueCountMap(),
bool useShaderInt8 = false);
virtual ~VulkanDevice(); virtual ~VulkanDevice();
const VulkanPhysicalDevice &getPhysicalDevice() const; const VulkanPhysicalDevice &getPhysicalDevice() const;
VulkanQueue & VulkanQueue &
@@ -296,7 +297,8 @@ public:
VulkanComputePipeline(const VulkanDevice &device, VulkanComputePipeline(const VulkanDevice &device,
const VulkanPipelineLayout &pipelineLayout, const VulkanPipelineLayout &pipelineLayout,
const VulkanShaderModule &shaderModule, const VulkanShaderModule &shaderModule,
const std::string &entryFuncName = "main"); const std::string &entryFuncName = "main",
const VkSpecializationInfo *spec = nullptr);
virtual ~VulkanComputePipeline(); virtual ~VulkanComputePipeline();
VulkanPipelineBindPoint getPipelineBindPoint() const; VulkanPipelineBindPoint getPipelineBindPoint() const;
}; };

View File

@@ -16,6 +16,7 @@
#include "testBase.h" #include "testBase.h"
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include "harness/parseParameters.h" #include "harness/parseParameters.h"
#include "harness/stringHelpers.h"
#include <array> #include <array>
#include <memory> #include <memory>
@@ -1009,3 +1010,113 @@ REGISTER_TEST(get_program_build_info)
return 0; return 0;
} }
cl_int test_kernel_name_len(cl_context context, cl_device_id device,
const cl_uint length)
{
cl_int error = CL_SUCCESS;
std::string buf = { "abcdefghijklmnopqrstuvwxyz" };
std::string name;
name.reserve(length);
for (cl_uint i = 0; i < length; ++i) name += buf[i % buf.size()];
const char *sample_name_size_test_kernel = R"(
__kernel void %s(int src, __global int *dst)
{
dst[0]=src;
}
)";
std::string program_source =
str_sprintf(std::string(sample_name_size_test_kernel), name.c_str());
const char *ptr = program_source.c_str();
{
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
name.c_str());
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to create program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
// query kernel name
size_t kernel_name_size = 0;
error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr,
&kernel_name_size);
test_error(error, "clGetKernelInfo (size) failed");
std::vector<char> kernel_name(kernel_name_size);
error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
kernel_name_size, kernel_name.data(), nullptr);
test_error(error, "clGetKernelInfo (name) failed");
if (name != std::string(kernel_name.data()))
{
log_error("Kernel name mismatch! expected=%s got=%s\n",
name.c_str(), kernel_name.data());
return TEST_FAIL;
}
}
if (gCompilationMode == kOnline)
{
clProgramWrapper programObj =
clCreateProgramWithSource(context, 1, &ptr, nullptr, &error);
test_error(error, "clCreateProgramWithSource failed (compile)");
error = clCompileProgram(programObj, 0, nullptr, nullptr, 0, nullptr,
nullptr, nullptr, nullptr);
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to compile program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
clProgramWrapper linkedProgram =
clLinkProgram(context, 0, nullptr, nullptr, 1, &programObj, nullptr,
nullptr, &error);
if (error != CL_SUCCESS)
{
log_error("ERROR: Unable to link program with length of "
"kernel name "
"%d : %s! (%s from %s:%d)\n",
length, name.c_str(), IGetErrorString(error), __FILE__,
__LINE__);
return TEST_FAIL;
}
clKernelWrapper kernel =
clCreateKernel(linkedProgram, name.c_str(), &error);
test_error(error, "clCreateKernel after link failed");
}
return TEST_PASS;
}
REGISTER_TEST(kernel_name_size)
{
for (cl_uint len = 32; len <= 2048; len *= 2)
{
cl_int status = test_kernel_name_len(context, device, len);
if (status == TEST_FAIL)
{
log_error("ERROR: test_kernel_name_len failed with length %d\n",
len);
return TEST_FAIL;
}
}
return TEST_PASS;
}

View File

@@ -14,7 +14,11 @@
// limitations under the License. // limitations under the License.
// //
#include "testBase.h" #include "testBase.h"
#include "harness/kernelHelpers.h"
#include "harness/os_helpers.h" #include "harness/os_helpers.h"
#include "harness/testHarness.h"
#include <array>
const char *preprocessor_test_kernel[] = { const char *preprocessor_test_kernel[] = {
"__kernel void sample_test(__global int *dst)\n" "__kernel void sample_test(__global int *dst)\n"
@@ -42,26 +46,22 @@ const char *include_test_kernel[] = {
"\n" "\n"
"}\n" }; "}\n" };
const char *options_test_kernel[] = { const char *options_test_kernel[] = { "__kernel void sample_test() {}\n" };
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" size_t tid = get_global_id(0);\n"
" dst[tid] = (int)src[tid];\n"
"}\n"
};
const char *optimization_options[] = { std::array optimization_options{
"-cl-single-precision-constant", std::pair{ "-cl-single-precision-constant", Version(1, 0) },
"-cl-denorms-are-zero", std::pair{ "-cl-denorms-are-zero", Version(1, 0) },
"-cl-opt-disable", std::pair{ "-cl-opt-disable", Version(1, 0) },
"-cl-mad-enable", std::pair{ "-cl-mad-enable", Version(1, 0) },
"-cl-no-signed-zeros", std::pair{ "-cl-no-signed-zeros", Version(1, 0) },
"-cl-unsafe-math-optimizations", std::pair{ "-cl-unsafe-math-optimizations", Version(1, 0) },
"-cl-finite-math-only", std::pair{ "-cl-finite-math-only", Version(1, 0) },
"-cl-fast-relaxed-math", std::pair{ "-cl-fast-relaxed-math", Version(1, 0) },
"-w", std::pair{ "-w", Version(1, 0) },
"-Werror", std::pair{ "-Werror", Version(1, 0) },
}; std::pair{ "-cl-uniform-work-group-size", Version(2, 0) },
std::pair{ "-cl-no-subgroup-ifp", Version(2, 1) },
};
cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue ) cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
{ {
@@ -93,31 +93,44 @@ REGISTER_TEST(options_build_optimizations)
int error; int error;
cl_build_status status; cl_build_status status;
for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) { Version version = get_device_cl_version(device);
clProgramWrapper program; for (const auto &optimization_option : optimization_options)
error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]);
if( program == NULL || error != CL_SUCCESS )
{ {
log_error( "ERROR: Unable to create reference program!\n" ); if (version < optimization_option.second)
{
continue;
}
auto build_options = std::string("-cl-std=CL")
+ get_max_OpenCL_C_for_context(context).to_string() + " "
+ optimization_option.first;
const char *option = build_options.c_str();
clProgramWrapper program;
error = create_single_kernel_helper_create_program(
context, &program, 1, options_test_kernel, option);
if (program == NULL || error != CL_SUCCESS)
{
log_error("ERROR: Unable to create reference program!\n");
return -1; return -1;
} }
/* Build with the macro defined */ /* Build with the macro defined */
log_info("Testing optimization option '%s'\n", optimization_options[i]); log_info("Testing optimization option '%s'\n", option);
error = clBuildProgram(program, 1, &device, optimization_options[i], error = clBuildProgram(program, 1, &device, option, NULL, NULL);
NULL, NULL); test_error(error, "Test program did not properly build");
test_error( error, "Test program did not properly build" );
error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
sizeof(status), &status, NULL); sizeof(status), &status, NULL);
test_error( error, "Unable to get program build status" ); test_error(error, "Unable to get program build status");
if( (int)status != CL_BUILD_SUCCESS ) if ((int)status != CL_BUILD_SUCCESS)
{ {
log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]); log_info(
print_error( error, "Failed to build with optimization defined") "Building with optimization option '%s' failed to compile!\n",
return -1; option);
print_error(error,
"Failed to build with optimization defined") return -1;
} }
} }
return 0; return 0;
@@ -415,3 +428,53 @@ REGISTER_TEST(options_denorm_cache)
return 0; return 0;
} }
REGISTER_TEST(options_uniform_work_group_size)
{
if (get_device_cl_version(device) < Version(2, 0))
{
return TEST_SKIPPED_ITSELF;
}
std::string build_options = "-cl-std=CL"
+ get_max_OpenCL_C_for_context(context).to_string()
+ " -cl-uniform-work-group-size";
const char *options = build_options.c_str();
clProgramWrapper program;
int error = create_single_kernel_helper_create_program(
context, &program, 1, options_test_kernel, options);
if (program == NULL || error != CL_SUCCESS)
{
log_error("Error: Unable to create reference program!\n");
return TEST_FAIL;
}
error = clBuildProgram(program, 1, &device, options, NULL, NULL);
test_error(error, "Test program did not properly build");
clKernelWrapper kernel = clCreateKernel(program, "sample_test", &error);
test_error(error, "Unable to create kernel");
size_t global_work_size = 4;
size_t uniform_local_work_size = 2;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
&uniform_local_work_size, 0, NULL, NULL);
test_error(error,
"Unable to enqueue NDRange kernel with uniform work group size");
error = clFinish(queue);
test_error(error, "Unable to finish");
size_t non_uniform_local_work_size = 3;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
&non_uniform_local_work_size, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE)
{
log_error(
"Error: expected error 'CL_INVALID_WORK_GROUP_SIZE' (got '%s') "
"trying to enqueue kernel compiled with '%s' with non-uniform work "
"group size\n",
IGetErrorString(error), options);
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -3983,6 +3983,9 @@ REGISTER_TEST(multiple_build_program)
error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads, error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
NULL, 0, NULL, NULL); NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
} }
{ {
@@ -4001,10 +4004,10 @@ REGISTER_TEST(multiple_build_program)
error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads, error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
NULL, 0, NULL, NULL); NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
}
error = clFinish(queue); error = clFinish(queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
}
std::vector<cl_int> test_values(num_threads, 0); std::vector<cl_int> test_values(num_threads, 0);
error = clEnqueueReadBuffer(queue, out_stream_0, true, 0, error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,

View File

@@ -56,6 +56,7 @@ const char *known_extensions[] = {
"cl_khr_integer_dot_product", "cl_khr_integer_dot_product",
"cl_khr_subgroup_rotate", "cl_khr_subgroup_rotate",
"cl_khr_kernel_clock", "cl_khr_kernel_clock",
"cl_khr_icd_unloadable",
// API-only extensions after this point. If you add above here, modify // API-only extensions after this point. If you add above here, modify
// first_API_extension below. // first_API_extension below.
"cl_khr_icd", "cl_khr_icd",

View File

@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
} }
#endif #endif
#ifdef __PPC__ #if defined(__PPC__) || defined(__riscv)
float ppc_mul(float a, float b) float ppc_mul(float a, float b)
{ {
float p; float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
// turn that off // turn that off
f3[i] = sse_mul(q, q2); f3[i] = sse_mul(q, q2);
f4[i] = sse_mul(-q, q2); f4[i] = sse_mul(-q, q2);
#elif defined(__PPC__) #elif (defined(__PPC__) || defined(__riscv))
// None of the current generation PPC processors support HW // RISC-V CPUs with default 'f' fp32 extension do not support
// FTZ, emulate it in sw. // enabling/disabling FTZ mode, subnormals are always handled
// without FTZ. None of the current generation PPC processors
// support HW FTZ, emulate it in sw.
f3[i] = ppc_mul(q, q2); f3[i] = ppc_mul(q, q2);
f4[i] = ppc_mul(-q, q2); f4[i] = ppc_mul(-q, q2);
#else #else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
skipTest[j][i] = (bufSkip[i] || skipTest[j][i] = (bufSkip[i] ||
(gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))))); (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
#if defined(__PPC__) #if defined(__PPC__) || defined(__riscv)
// Since the current Power processors don't emulate flush to zero in HW, // Since the current Power processors don't emulate flush to
// it must be emulated in SW instead. // zero in HW, it must be emulated in SW instead. (same for
// RISC-V CPUs with 'f' extension)
if (gForceFTZ) if (gForceFTZ)
{ {
if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f)) if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
} }
} }
double *f = (double*) buf1; double *f = (double*) buf1;
double *f2 = (double*) buf2; double *f2 = (double*) buf2;
double *f3 = (double*) buf3_double; double *f3 = (double*) buf3_double;

View File

@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
uint64_t GetTime(void); uint64_t GetTime(void);
void WriteInputBufferComplete(void *); void WriteInputBufferComplete(void *);
void *FlushToZero(void);
void UnFlushToZero(void *);
} }
struct CalcRefValsBase struct CalcRefValsBase

View File

@@ -340,7 +340,7 @@ static const char* enqueue_block_capture_event_profiling_info_before_execution[]
set_user_event_status(user_evt, CL_COMPLETE); set_user_event_status(user_evt, CL_COMPLETE);
void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; void (^checkBlock) (void) = ^{ check_res(tid, value, res); };
enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }

View File

@@ -129,8 +129,8 @@ static const char *helper_ndrange_2d_glo[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, " "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -156,8 +156,8 @@ static const char *helper_ndrange_2d_loc[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, " "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -193,8 +193,8 @@ static const char *helper_ndrange_2d_ofs[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, " "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -233,8 +233,8 @@ static const char *helper_ndrange_3d_glo[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, " "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -266,8 +266,8 @@ static const char *helper_ndrange_3d_loc[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, " "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -306,8 +306,8 @@ static const char *helper_ndrange_3d_ofs[] = {
"}" NL, "}" NL,
"" NL, "" NL,
"kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, " "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"val, __global uint* ofs_arr)" NL, "atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL, "{" NL,
" size_t tid = get_global_id(0);" NL, " size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,

View File

@@ -14,7 +14,10 @@
// //
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include <cinttypes> #include <cinttypes>
#include <vector>
#include <string>
#define BUF_SIZE 1024 #define BUF_SIZE 1024
#define BUF_SIZE_STR "1024" #define BUF_SIZE_STR "1024"
@@ -310,8 +313,8 @@ private:
// A basic buffer used to pass the other buffer's address. // A basic buffer used to pass the other buffer's address.
error = clEnqueueWriteBuffer(queue, buffer_in_long, error = clEnqueueWriteBuffer(queue, buffer_in_long,
CL_TRUE, // block CL_TRUE, // block
0, sizeof(cl_long), &DeviceAddrFromAPI, 0, sizeof(DeviceAddrFromAPI),
0, NULL, NULL); &DeviceAddrFromAPI, 0, NULL, NULL);
test_error_fail(error, test_error_fail(error,
"clEnqueueWriteBuffer of dev_addr_buffer failed\n"); "clEnqueueWriteBuffer of dev_addr_buffer failed\n");
@@ -322,9 +325,9 @@ private:
&buffer_out_int); &buffer_out_int);
test_error_fail(error, "clSetKernelArg 1 failed\n"); test_error_fail(error, "clSetKernelArg 1 failed\n");
error = clSetKernelExecInfo(ind_access_kernel, error = clSetKernelExecInfo(
CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, ind_access_kernel, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
sizeof(void *), &DeviceAddrFromAPI); sizeof(DeviceAddrFromAPI), &DeviceAddrFromAPI);
test_error_fail(error, test_error_fail(error,
"Setting indirect access for " "Setting indirect access for "
"device ptrs failed!\n"); "device ptrs failed!\n");
@@ -421,6 +424,8 @@ private:
REGISTER_TEST(private_address) REGISTER_TEST(private_address)
{ {
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest( BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest(
device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT); device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT);
@@ -435,3 +440,180 @@ REGISTER_TEST(private_address)
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST(private_address_multi_device)
{
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
cl_platform_id platform = 0;
cl_int error = CL_SUCCESS;
cl_uint numDevices = 0;
error = clGetPlatformIDs(1, &platform, NULL);
test_error_ret(error, "Unable to get platform\n", TEST_FAIL);
/* Get some devices */
error =
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
if (numDevices < 2)
{
log_info(
"WARNING: multi device test unable to get multiple devices via "
"CL_DEVICE_TYPE_ALL (got %u devices). Skipping test...\n",
numDevices);
return TEST_SKIPPED_ITSELF;
}
std::vector<cl_device_id> devices(numDevices);
error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices,
devices.data(), &numDevices);
test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
GET_PFN(devices[0], clSetKernelArgDevicePointerEXT);
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM,
(cl_context_properties)platform, 0 };
clContextWrapper ctx = clCreateContext(
properties, numDevices, devices.data(), nullptr, nullptr, &error);
test_error_ret(error, "Unable to create context\n", TEST_FAIL);
/* Create buffer */
cl_mem_properties props[] = { CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT, CL_TRUE,
0 };
clMemWrapper buffer = clCreateBufferWithProperties(
ctx, props, CL_MEM_READ_WRITE, 16, nullptr, &error);
std::vector<cl_mem_device_address_ext> addresses(numDevices);
error =
clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
sizeof(cl_mem_device_address_ext) * addresses.size(),
addresses.data(), nullptr);
test_error_ret(error, "clGetMemObjectInfo failed\n", TEST_FAIL);
std::vector<clCommandQueueWrapper> queues(numDevices);
for (cl_uint i = 0; i < numDevices; ++i)
{
queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &error);
test_error_ret(error, "Unable to create command queue\n", TEST_FAIL);
}
static std::string source = R"(
void kernel test_device_address(
global ulong* ptr,
ulong value)
{
*ptr = value;
})";
clProgramWrapper program;
clKernelWrapper kernel;
const char *source_ptr = source.data();
error = create_single_kernel_helper(ctx, &program, &kernel, 1, &source_ptr,
"test_device_address");
test_error(error, "Unable to create test kernel");
for (cl_uint i = 0; i < numDevices; ++i)
{
cl_command_queue queue = queues[i];
error = clSetKernelArgDevicePointerEXT(kernel, 0, 0);
test_error_fail(error,
"clSetKernelArgDevicePointerEXT failed with NULL "
"pointer argument\n");
error = clSetKernelArgDevicePointerEXT(kernel, 0, addresses[i] + 8);
test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
const cl_ulong pattern = 0xAABBCCDDEEFF0011 + i;
error = clSetKernelArg(kernel, 1, sizeof(pattern), &pattern);
test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
size_t gwo = 0;
size_t gws = 1;
size_t lws = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0,
nullptr, nullptr);
test_error_ret(error, "Unable to enqueue kernel\n", TEST_FAIL);
error = clFinish(queue);
test_error_ret(error, "clFinish failed\n", TEST_FAIL);
std::vector<cl_ulong> results(2, 0);
error = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
results.size() * sizeof(cl_ulong),
results.data(), 0, nullptr, nullptr);
test_error_ret(error, "clEnqueueReadBuffer failed\n", TEST_FAIL);
if (results[1] != pattern)
test_fail("Test value doesn't match expected value\n");
}
return TEST_PASS;
}
REGISTER_TEST(negative_private_address)
{
REQUIRE_EXTENSION("cl_ext_buffer_device_address");
cl_int error = CL_SUCCESS;
GET_PFN(device, clSetKernelArgDevicePointerEXT);
/* Create buffer */
clMemWrapper buffer = clCreateBufferWithProperties(
context, nullptr, CL_MEM_READ_WRITE, 16, nullptr, &error);
cl_mem_device_address_ext address;
error = clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
sizeof(cl_mem_device_address_ext), &address,
nullptr);
test_failure_error_ret(
error, CL_INVALID_OPERATION,
"clGetMemObjectInfo should return CL_INVALID_OPERATION when: "
"\"the buffer was not created with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT\"",
TEST_FAIL);
static std::string source = R"(
void kernel test_device_address(
global ulong* ptr,
local ulong* ptr2,
ulong value)
{
*ptr = value;
})";
clProgramWrapper program;
clKernelWrapper kernel;
const char *source_ptr = source.data();
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source_ptr, "test_device_address");
test_error(error, "Unable to create test kernel");
error = clSetKernelArgDevicePointerEXT(nullptr, 0, 0);
test_failure_error_ret(
error, CL_INVALID_KERNEL,
"clSetKernelArgDevicePointerEXT should return CL_INVALID_KERNEL when: "
"\"kernel is not a valid kernel object\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 1, 0x15465);
test_failure_error_ret(
error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"the expected kernel argument is not a pointer to global memory\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 2, 0x15465);
test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"the expected kernel argument is not a pointer\"",
TEST_FAIL);
error = clSetKernelArgDevicePointerEXT(kernel, 3, 0x15465);
test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
"clSetKernelArgDevicePointerEXT should return "
"CL_INVALID_ARG_INDEX when: "
"\"arg_index is not a valid argument index\"",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -42,8 +42,8 @@ int test_cxx_for_opencl(cl_device_id device, cl_context context,
execute(*p, x); execute(*p, x);
})"; })";
error = create_single_kernel_helper_with_build_options( error = create_single_kernel_helper(context, &program, &kernel1, 1,
context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++"); &kernel_sstr, "k1", "-cl-std=CLC++");
test_error(error, "Failed to create k1 kernel"); test_error(error, "Failed to create k1 kernel");
kernel2 = clCreateKernel(program, "k2", &error); kernel2 = clCreateKernel(program, "k2", &error);

View File

@@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_event_info.cpp command_buffer_test_event_info.cpp
command_buffer_finalize.cpp command_buffer_finalize.cpp
command_buffer_pipelined_enqueue.cpp command_buffer_pipelined_enqueue.cpp
command_buffer_kernel_attributes.cpp
negative_command_buffer_finalize.cpp negative_command_buffer_finalize.cpp
negative_command_buffer_svm_mem.cpp negative_command_buffer_svm_mem.cpp
negative_command_buffer_copy_image.cpp negative_command_buffer_copy_image.cpp

View File

@@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES
mutable_command_work_groups.cpp mutable_command_work_groups.cpp
mutable_command_work_dim.cpp mutable_command_work_dim.cpp
mutable_command_update_state.cpp mutable_command_update_state.cpp
mutable_command_defer_arguments.cpp
../basic_command_buffer.cpp ../basic_command_buffer.cpp
) )

View File

@@ -76,12 +76,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
bool Skip() override bool Skip() override
{ {
bool extension_avaliable = bool extension_available = is_extension_available(
is_extension_available(device, device, "cl_khr_command_buffer_mutable_dispatch");
"cl_khr_command_buffer_mutable_dispatch")
== true;
if (extension_avaliable) if (extension_available)
{ {
Version device_version = get_device_cl_version(device); Version device_version = get_device_cl_version(device);
if ((device_version >= Version(3, 0)) if ((device_version >= Version(3, 0))
@@ -91,12 +89,12 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
cl_version extension_version = get_extension_version( cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch"); device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 4)) if (extension_version != CL_MAKE_VERSION(0, 9, 5))
{ {
log_info("cl_khr_command_buffer_mutable_dispatch version " log_info("cl_khr_command_buffer_mutable_dispatch version "
"0.9.4 is " "0.9.5 is "
"required to run the test, skipping.\n "); "required to run the test, skipping.\n ");
extension_avaliable = false; extension_available = false;
} }
} }
} }
@@ -109,7 +107,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
sizeof(mutable_capabilities), &mutable_capabilities, nullptr) sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities != 0; && mutable_capabilities != 0;
return !mutable_support || !extension_avaliable return !mutable_support || !extension_available
|| BasicCommandBufferTest::Skip(); || BasicCommandBufferTest::Skip();
} }

View File

@@ -0,0 +1,219 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "mutable_command_basic.h"
namespace {
////////////////////////////////////////////////////////////////////////////////
// Mutable dispatch test which handles the case where all the arguments of a
// kernel aren't set when a kernel is initially added to a mutable
// command-buffer, but deferred until an update is made to the command to set
// them before command-buffer enqueue.
struct MutableDispatchDeferArguments : public BasicMutableCommandBufferTest
{
MutableDispatchDeferArguments(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue)
{}
bool Skip() override
{
if (BasicMutableCommandBufferTest::Skip()) return true;
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
// require mutable arguments capability
return !mutable_support;
}
cl_int SetUpKernel() override
{
// Create kernel
const char *defer_args_kernel =
R"(
__kernel void defer_args_test(__constant int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
})";
cl_int error =
create_single_kernel_helper(context, &program, &kernel, 1,
&defer_args_kernel, "defer_args_test");
test_error(error, "Creating kernel failed");
return CL_SUCCESS;
}
cl_int SetUpKernelArgs() override
{
// Create and initialize buffers
MTdataHolder d(gRandomSeed);
src_data.resize(num_elements);
for (size_t i = 0; i < num_elements; i++)
src_data[i] = (cl_int)genrand_int32(d);
cl_int error = CL_SUCCESS;
in_mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
num_elements * sizeof(cl_int), src_data.data(),
&error);
test_error(error, "Creating src buffer");
out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * sizeof(cl_int), NULL, &error);
test_error(error, "Creating initial dst buffer failed");
// Only set a single kernel argument, leaving argument at index 1 unset
error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
test_error(error, "Unable to set src kernel arguments");
return CL_SUCCESS;
}
bool verify_state(cl_command_buffer_state_khr expected)
{
cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), &state,
nullptr);
if (error != CL_SUCCESS)
{
log_error("clGetCommandBufferInfoKHR failed: %d", error);
return false;
}
if (state != expected)
{
log_error("Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query. "
"Expected %u, but was %u\n",
expected, state);
return false;
}
return true;
}
bool verify_result(const cl_mem &buffer)
{
std::vector<cl_int> data(num_elements);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
if (data[i] != src_data[i])
{
log_error("Modified verification failed at index %zu: Got %d, "
"wanted %d\n",
i, data[i], src_data[i]);
return false;
}
}
return true;
}
cl_int Run() override
{
// Create command while the kernel still has the second argument unset.
// Passing 'CL_MUTABLE_DISPATCH_ARGUMENTS_KHR' as a property means this
// shouldn't be an error.
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
// Finalizing the command buffer shouldn't be an error, but result in
// the command-buffer entering the CL_COMMAND_BUFFER_STATE_FINALIZED
// state.
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
if (!verify_state(CL_COMMAND_BUFFER_STATE_FINALIZED_KHR))
{
return TEST_FAIL;
}
// Check that trying to enqueue the command-buffer in this state is an
// error, as it needs to be in the executable state.
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_failure_error_ret(error, CL_INVALID_OPERATION,
"clEnqueueCommandBufferKHR should return "
"CL_INVALID_OPERATION",
TEST_FAIL);
// Update the kernel command to set the missing argument.
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(out_mem), &out_mem };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Now that all the arguments have been set, verify the
// command-buffer has entered the executable state.
if (!verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR))
{
return TEST_FAIL;
}
// Execute command-buffer and verify results are expected
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
if (!verify_result(out_mem)) return TEST_FAIL;
return TEST_PASS;
}
cl_mutable_command_khr command;
std::vector<cl_int> src_data;
};
} // anonymous namespace
REGISTER_TEST(mutable_dispatch_defer_arguments)
{
return MakeAndRunTest<MutableDispatchDeferArguments>(device, context, queue,
num_elements);
}

View File

@@ -20,6 +20,7 @@
#include <CL/cl.h> #include <CL/cl.h>
#include <CL/cl_ext.h> #include <CL/cl_ext.h>
#include <algorithm>
#include <vector> #include <vector>
namespace { namespace {
@@ -126,7 +127,20 @@ struct MutableCommandFullDispatch : InfoMutableCommandBufferTest
&workgroupinfo_size, NULL); &workgroupinfo_size, NULL);
test_error(error, "clGetKernelWorkGroupInfo failed"); test_error(error, "clGetKernelWorkGroupInfo failed");
group_size = std::min(num_elements, workgroupinfo_size); cl_uint max_work_dimension = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(max_work_dimension), &max_work_dimension,
NULL);
test_error(error, "clGetDeviceInfo failed");
std::vector<size_t> max_work_item_sizes(max_work_dimension, 0);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * max_work_item_sizes.size(),
max_work_item_sizes.data(), NULL);
test_error(error, "clGetDeviceInfo failed");
group_size = std::min(
{ num_elements, workgroupinfo_size, max_work_item_sizes[0] });
const size_t size_to_allocate_src = group_size * sizeof(cl_int); const size_t size_to_allocate_src = group_size * sizeof(cl_int);
// create and initialize source buffer // create and initialize source buffer

View File

@@ -0,0 +1,235 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "basic_command_buffer.h"
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests for cl_khr_command_buffer while enqueueing a kernel with a
// reqd_work_group_size with a NULL local_work_size.
struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest
{
inline static const std::string body_str = R"(
__kernel void wg_size(__global int* dst)
{
if (get_global_id(0) == 0 &&
get_global_id(1) == 0 &&
get_global_id(2) == 0) {
dst[0] = get_local_size(0);
dst[1] = get_local_size(1);
dst[2] = get_local_size(2);
}
}
)";
KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicCommandBufferTest(device, context, queue), dst(nullptr),
clGetKernelSuggestedLocalWorkSizeKHR(nullptr),
device_max_work_group_size(0)
{}
cl_int SetUp(int elements) override
{
cl_int error = BasicCommandBufferTest::SetUp(elements);
test_error(error, "BasicCommandBufferTest::SetUp failed");
if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
{
cl_platform_id platform = nullptr;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for platform failed");
clGetKernelSuggestedLocalWorkSizeKHR =
(clGetKernelSuggestedLocalWorkSizeKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSuggestedLocalWorkSizeKHR");
test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
"Couldn't get function pointer for "
"clGetKernelSuggestedLocalWorkSizeKHR");
}
dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
nullptr, &error);
test_error(error, "clCreateBuffer failed");
cl_uint device_max_dim = 0;
error =
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(device_max_dim), &device_max_dim, nullptr);
test_error(
error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
test_assert_error(
device_max_dim >= 3,
"CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
device_max_work_item_sizes.resize(device_max_dim);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * device_max_dim,
device_max_work_item_sizes.data(), nullptr);
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(device_max_work_group_size),
&device_max_work_group_size, nullptr);
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
return CL_SUCCESS;
}
cl_int Run() override
{
cl_int error = CL_SUCCESS;
struct KernelAttribInfo
{
cl_int wgs[3];
cl_uint min_dim;
};
std::vector<KernelAttribInfo> attribs = { { { 2, 1, 1 }, 1 },
{ { 2, 3, 1 }, 2 },
{ { 2, 3, 4 }, 3 } };
for (auto& attrib : attribs)
{
const std::string attrib_str =
"__attribute__((reqd_work_group_size("
+ std::to_string(attrib.wgs[0]) + ","
+ std::to_string(attrib.wgs[1]) + ","
+ std::to_string(attrib.wgs[2]) + ")))";
const std::string source_str = attrib_str + body_str;
const char* source = source_str.c_str();
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "wg_size");
test_error(error, "Unable to create test kernel");
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
test_error(error, "clSetKernelArg failed");
for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
{
const size_t test_work_group_size =
attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
|| (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
|| (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
|| test_work_group_size > device_max_work_group_size)
{
log_info(
"Skipping test for work_dim = %u: required work group "
"size (%i, %i, %i) (total %zu) exceeds device max "
"work group size (%zu, %zu, %zu) (total %zu)\n",
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
test_work_group_size, device_max_work_item_sizes[0],
device_max_work_item_sizes[1],
device_max_work_item_sizes[2],
device_max_work_group_size);
continue;
}
const cl_int zero = 0;
error = clCommandFillBufferKHR(
command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero),
0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
global_work_size, nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
cl_int results[3] = { -1, -1, -1 };
error =
clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
results, 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the result
if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
|| results[2] != attrib.wgs[2])
{
log_error(
"Executed local size mismatch with work_dim = %u: "
"Expected (%d,%d,%d) got (%d,%d,%d)\n",
work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
results[0], results[1], results[2]);
return TEST_FAIL;
}
if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
{
size_t suggested[3] = { 1, 1, 1 };
error = clGetKernelSuggestedLocalWorkSizeKHR(
queue, kernel, work_dim, nullptr, global_work_size,
suggested);
test_error(error,
"clGetKernelSuggestedLocalWorkSizeKHR failed");
if (suggested[0] != (size_t)attrib.wgs[0]
|| suggested[1] != (size_t)attrib.wgs[1]
|| suggested[2] != (size_t)attrib.wgs[2])
{
log_error(
"Suggested local size mismatch with work_dim = "
"%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
work_dim, attrib.wgs[0], attrib.wgs[1],
attrib.wgs[2], suggested[0], suggested[1],
suggested[2]);
return TEST_FAIL;
}
}
// create new command buffer
command_buffer =
clCreateCommandBufferKHR(1, &queue, nullptr, &error);
test_error(error, "clCreateCommandBufferKHR failed");
}
}
return CL_SUCCESS;
}
clMemWrapper dst;
clGetKernelSuggestedLocalWorkSizeKHR_fn
clGetKernelSuggestedLocalWorkSizeKHR;
size_t device_max_work_group_size;
std::vector<size_t> device_max_work_item_sizes;
};
} // anonymous namespace
REGISTER_TEST(command_null_required_work_group_size)
{
return MakeAndRunTest<KernelAttributesReqGroupSizeTest>(
device, context, queue, num_elements);
}

View File

@@ -23,9 +23,32 @@
#include <android/hardware_buffer.h> #include <android/hardware_buffer.h>
#include "debug_ahb.h" #include "debug_ahb.h"
static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage) static bool isAHBUsageReadableHost(AHardwareBuffer_UsageFlags usage)
{ {
return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0; return (AHARDWAREBUFFER_USAGE_CPU_READ_MASK & usage) != 0;
}
static bool isAHBUsageWritableHost(AHardwareBuffer_UsageFlags usage)
{
return (AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK & usage) != 0;
}
static bool isAHBUsageReadableDevice(const AHardwareBuffer_UsageFlags usage)
{
return ((AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA)
& usage)
!= 0;
}
static cl_ulong getMaxAllocSize(cl_device_id device)
{
cl_ulong ret;
cl_int err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), &ret, nullptr);
test_error(err, "clGetDeviceInfo failed");
return ret;
} }
struct ahb_format_table struct ahb_format_table
@@ -50,6 +73,32 @@ ahb_image_size_table test_sizes[] = {
{ 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 } { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 }
}; };
uint32_t test_buffer_sizes[] = { 2, 8, 32, 128, 512, 2048, 16384, 65536 };
ahb_usage_table test_buffer_usages[] = {
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY) },
{ static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }
};
ahb_usage_table test_usages[] = { ahb_usage_table test_usages[] = {
{ static_cast<AHardwareBuffer_UsageFlags>( { static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
@@ -96,6 +145,23 @@ static const char *diff_images_kernel_source = {
})" })"
}; };
static const char *lifetime_kernel_source = {
R"(
__kernel void increment_buffer(global uchar* buffer)
{
int tid = get_global_id(0);
buffer[tid] ++;
}
__kernel void set_image_color(write_only image2d_t ahb_image, float4 set_color)
{
int tidX = get_global_id(0);
int tidY = get_global_id(1);
write_imagef(ahb_image, (int2)( tidX, tidY ), set_color);
})"
};
// Checks that the inferred image format is correct // Checks that the inferred image format is correct
REGISTER_TEST(images) REGISTER_TEST(images)
{ {
@@ -217,8 +283,9 @@ REGISTER_TEST(images_read)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -512,8 +579,9 @@ REGISTER_TEST(enqueue_read_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -690,8 +758,9 @@ REGISTER_TEST(enqueue_copy_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -993,8 +1062,9 @@ REGISTER_TEST(enqueue_copy_image_to_buffer)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -1181,8 +1251,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -1222,8 +1292,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
imageInfo.type = format.clMemObjectType; imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width; imageInfo.width = resolution.width;
imageInfo.height = resolution.height; imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height // data is tightly packed in buffer
* pixelSize; // data is tightly packed in buffer imageInfo.rowPitch = resolution.width * pixelSize;
test_assert_error(imageInfo.rowPitch test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width, >= pixelSize * imageInfo.width,
"Row pitch is smaller than width"); "Row pitch is smaller than width");
@@ -1376,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -1429,8 +1499,8 @@ REGISTER_TEST(enqueue_write_image)
imageInfo.type = format.clMemObjectType; imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width; imageInfo.width = resolution.width;
imageInfo.height = resolution.height; imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height // Data is tightly packed
* pixelSize; // Data is tightly packed imageInfo.rowPitch = resolution.width * pixelSize;
test_assert_error(imageInfo.rowPitch test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width, >= pixelSize * imageInfo.width,
"Row pitch is smaller than width"); "Row pitch is smaller than width");
@@ -1568,8 +1638,8 @@ REGISTER_TEST(enqueue_fill_image)
aHardwareBufferDesc.format = format.aHardwareBufferFormat; aHardwareBufferDesc.format = format.aHardwareBufferFormat;
for (auto usage : test_usages) for (auto usage : test_usages)
{ {
// Filter out usage flags that are not readable on device if (!(isAHBUsageReadableHost(usage.usageFlags)
if (!isAHBUsageReadable(usage.usageFlags)) && isAHBUsageReadableDevice(usage.usageFlags)))
{ {
continue; continue;
} }
@@ -1620,8 +1690,8 @@ REGISTER_TEST(enqueue_fill_image)
imageInfo.type = format.clMemObjectType; imageInfo.type = format.clMemObjectType;
imageInfo.width = resolution.width; imageInfo.width = resolution.width;
imageInfo.height = resolution.height; imageInfo.height = resolution.height;
imageInfo.rowPitch = resolution.width * resolution.height imageInfo.rowPitch = resolution.width * pixelSize;
* pixelSize; // Data is tightly packed // Data is tightly packed
test_assert_error(imageInfo.rowPitch test_assert_error(imageInfo.rowPitch
>= pixelSize * imageInfo.width, >= pixelSize * imageInfo.width,
"Row pitch is smaller than width"); "Row pitch is smaller than width");
@@ -1857,3 +1927,412 @@ REGISTER_TEST(blob)
return TEST_PASS; return TEST_PASS;
} }
/*
* For cl buffer and cl image
* Create a AHB
* Create a mem object from the AHB
* Release the AHB
* Read and write using the mem object
* Verify reads and writes
*/
REGISTER_TEST(lifetime_buffer)
{
REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
cl_int err;
constexpr cl_uint buffer_size = 4096;
std::vector<uint8_t> host_buffer(buffer_size, 1);
clMemWrapper imported_buffer;
{
// Check if AHB descriptors for buffers and images are supported
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
return TEST_SKIPPED_ITSELF;
}
log_info("Testing buffer lifetime\n");
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
const cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(),
0,
};
imported_buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
// Fill AHB buffer
void *data_ptr = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr,
&data_ptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n", ahb_result);
return TEST_FAIL;
}
memcpy(data_ptr, host_buffer.data(), buffer_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
} // Release test scope reference to AHB
// Verify buffer read by comparing to host buffer
std::vector<uint8_t> read_buffer(buffer_size);
err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
read_buffer.data(), 0, nullptr, nullptr);
test_error(err, "failed clEnqueueReadBuffer");
for (size_t i = 0; i < buffer_size; i++)
{
if (read_buffer[i] != host_buffer[i])
{
log_error("At position %zu expected value: %u but got value: %u\n",
i, host_buffer[i], read_buffer[i]);
return TEST_FAIL;
}
}
// Attempt buffer write
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&lifetime_kernel_source,
"increment_buffer");
test_error(err, "kernel creation failed");
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_buffer);
test_error(err, "clSetKernelArg failed");
size_t gws[1] = { buffer_size };
err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, gws, nullptr, 0,
nullptr, nullptr);
test_error(err, "Failed clEnqueueNDRangeKernel");
// Verify write
err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
read_buffer.data(), 0, nullptr, nullptr);
test_error(err, "failed clEnqueueReadBuffer");
for (size_t i = 0; i < buffer_size; i++)
{
if (read_buffer[i]
!= host_buffer[i] + 1) // Kernel incremented each index by 1
{
log_error("At position %zu expected value: %u but got value: %u\n",
i, host_buffer[i], read_buffer[i]);
return TEST_FAIL;
}
}
return TEST_PASS;
}
REGISTER_TEST(lifetime_image)
{
REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
int err;
const AHardwareBuffer_Format aHardwareBufferFormat =
AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
const cl_image_format clImageFormat = { CL_RGBA, CL_UNORM_INT8 };
const size_t pixel_size = get_pixel_size(&clImageFormat);
for (auto resolution : test_sizes)
{
const size_t image_size =
resolution.width * resolution.height * pixel_size;
std::vector<uint8_t> host_image_data(image_size, 1);
clMemWrapper imported_image;
{
// Check if AHB descriptors for buffers and images are supported
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.width = resolution.width;
aHardwareBufferDesc.height = resolution.height;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.format = aHardwareBufferFormat;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
continue;
}
log_info("Testing image lifetime\n");
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
const cl_mem_properties props_image[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(),
0,
};
imported_image = clCreateImageWithProperties(
context, props_image, CL_MEM_READ_WRITE, nullptr, nullptr,
nullptr, &err);
test_error(err, "Failed to create CL image from AHardwareBuffer");
void *data_ptr = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
nullptr, &data_ptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
memcpy(data_ptr, host_image_data.data(), image_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
} // Release test scope reference to AHB
// Verify image read using host data
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { resolution.width, resolution.height, 1 };
size_t row_pitch;
uint8_t *mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
queue, imported_image, true, CL_MAP_READ, origin, region,
&row_pitch, nullptr, 0, nullptr, nullptr, &err));
test_error(err, "clEnqueueMapImage failed");
for (size_t row = 0; row < resolution.height; ++row)
{
for (size_t col = 0; col < resolution.width; ++col)
{
size_t mapped_image_idx = row * row_pitch + col;
size_t host_image_idx = row * resolution.width + col;
if (mapped_image_ptr[mapped_image_idx]
!= host_image_data[host_image_idx])
{
log_error(
"At position (%zu, %zu) expected value: %u but got "
"value: %u\n",
row, col, host_image_data[host_image_idx],
mapped_image_ptr[mapped_image_idx]);
return TEST_FAIL;
}
}
}
err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
0, nullptr, nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queue);
test_error(err, "clFinish failed");
// Attempt image write
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&lifetime_kernel_source,
"set_image_color");
test_error(err, "kernel creation failed");
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image);
test_error(err, "clSetKernelArg failed");
cl_float4 color = { { 0.5f, 0.5f, 0.5f, 0.5f } };
err = clSetKernelArg(kernel, 1, sizeof(cl_float4), &color);
test_error(err, "clSetKernelArg failed");
std::vector<size_t> gws = { resolution.width, resolution.height };
err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, gws.data(),
nullptr, 0, nullptr, nullptr);
test_error(err, "Failed clEnqueueNDRangeKernel");
err = clFinish(queue);
test_error(err, "clFinish failed");
// Verify image write
mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
queue, imported_image, true, CL_MAP_READ, origin, region,
&row_pitch, nullptr, 0, nullptr, nullptr, &err));
test_error(err, "clEnqueueMapImage failed");
for (size_t row = 0; row < resolution.height; ++row)
{
for (size_t col = 0; col < resolution.width; ++col)
{
size_t mapped_image_idx = row * row_pitch + col;
if (128 != mapped_image_ptr[mapped_image_idx])
{
log_error(
"At position (%zu, %zu) expected value: %u but got "
"value: %u\n",
row, col, 128, mapped_image_ptr[mapped_image_idx]);
return TEST_FAIL;
}
}
}
err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
0, nullptr, nullptr);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queue);
test_error(err, "clFinish failed");
}
return TEST_PASS;
}
/* Testing clCreateSubBuffer
* Create AHB
* Write to AHB
* Create CL buffer from AHB
* Create a sub buffer into half of the buffer
* Read & verify sub buffer
*/
REGISTER_TEST(sub_buffer)
{
cl_int err;
RandomSeed seed(gRandomSeed);
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
for (auto usage : test_buffer_usages)
{
if (!(isAHBUsageReadableHost(usage.usageFlags)
&& isAHBUsageWritableHost(usage.usageFlags)
&& isAHBUsageReadableDevice(usage.usageFlags)))
{
continue;
}
aHardwareBufferDesc.usage = usage.usageFlags;
for (uint32_t buffer_size : test_buffer_sizes)
{
if (buffer_size > getMaxAllocSize(device))
{
continue;
}
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
log_unsupported_ahb_format(aHardwareBufferDesc);
continue;
}
AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
log_info("Testing usage: %s, buffer size: %u\n",
ahardwareBufferDecodeUsageFlagsToString(usage.usageFlags)
.c_str(),
buffer_size);
void *hardware_buffer_data = nullptr;
int ahb_result = AHardwareBuffer_lock(
aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1,
nullptr, &hardware_buffer_data);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_lock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
std::vector<uint8_t> host_buffer(buffer_size);
generate_random_data(ExplicitType::kUnsignedChar, buffer_size, seed,
host_buffer.data());
memcpy(hardware_buffer_data, host_buffer.data(), buffer_size);
ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_unlock failed with code %d\n",
ahb_result);
return TEST_FAIL;
}
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
aHardwareBuffer.get_props(), 0
};
clMemWrapper buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_error(err, "Failed to create CL buffer from AHardwareBuffer");
cl_uint sub_buffer_size = buffer_size / 2;
cl_buffer_region region = { 0 };
region.origin = 0;
region.size = sub_buffer_size;
clMemWrapper sub_buffer =
clCreateSubBuffer(buffer, CL_MEM_READ_WRITE,
CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
test_error(err, "clCreateSubBuffer failed");
std::vector<uint8_t> host_sub_buffer(sub_buffer_size);
err = clEnqueueReadBuffer(queue, sub_buffer, true, 0,
sub_buffer_size, host_sub_buffer.data(),
0, nullptr, nullptr);
test_error(err, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < sub_buffer_size; ++i)
{
if (host_buffer[i] != host_sub_buffer[i])
{
log_error(
"At position i=%zu expected value %u but got %u\n", i,
host_buffer[i], host_sub_buffer[i]);
return TEST_FAIL;
}
}
}
}
return TEST_PASS;
}

View File

@@ -3,7 +3,6 @@ set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE)
set(${MODULE_NAME}_SOURCES set(${MODULE_NAME}_SOURCES
main.cpp main.cpp
test_external_semaphore.cpp test_external_semaphore.cpp
test_external_semaphore_sync_fd.cpp
) )
set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}") set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
@@ -18,7 +17,6 @@ include_directories (${CLConform_INCLUDE_DIR})
list(APPEND CLConform_LIBRARIES vulkan_wrapper) list(APPEND CLConform_LIBRARIES vulkan_wrapper)
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
set(CMAKE_CXX_FLAGS "-fpermissive")
include_directories("../../common/vulkan_wrapper") include_directories("../../common/vulkan_wrapper")

View File

@@ -197,8 +197,11 @@ REGISTER_TEST_VERSION(external_semaphores_queries, Version(1, 2))
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) cl_int doTestImportExport(cl_device_id device, cl_context contexts[2],
cl_command_queue queues[2])
{ {
cl_int err = CL_SUCCESS;
REQUIRE_EXTENSION("cl_khr_external_semaphore"); REQUIRE_EXTENSION("cl_khr_external_semaphore");
GET_PFN(device, clEnqueueSignalSemaphoresKHR); GET_PFN(device, clEnqueueSignalSemaphoresKHR);
@@ -210,7 +213,6 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
std::vector<cl_external_semaphore_handle_type_khr> import_handle_types; std::vector<cl_external_semaphore_handle_type_khr> import_handle_types;
std::vector<cl_external_semaphore_handle_type_khr> export_handle_types; std::vector<cl_external_semaphore_handle_type_khr> export_handle_types;
cl_int err = CL_SUCCESS;
err = get_device_semaphore_handle_types( err = get_device_semaphore_handle_types(
device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
import_handle_types); import_handle_types);
@@ -237,17 +239,9 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
export_handle_types.begin(), export_handle_types.end(), export_handle_types.begin(), export_handle_types.end(),
std::back_inserter(import_export_handle_types)); std::back_inserter(import_export_handle_types));
cl_context context2 = cl_context& context2 = contexts[1];
clCreateContext(NULL, 1, &device, notify_callback, NULL, &err); cl_command_queue& queue1 = queues[0];
test_error(err, "Failed to create context2"); cl_command_queue& queue2 = queues[1];
clCommandQueueWrapper queue1 =
clCreateCommandQueue(context, device, 0, &err);
test_error(err, "Could not create command queue");
clCommandQueueWrapper queue2 =
clCreateCommandQueue(context2, device, 0, &err);
test_error(err, "Could not create command queue");
if (import_export_handle_types.empty()) if (import_export_handle_types.empty())
{ {
@@ -270,7 +264,7 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
// Signal semaphore on context1 // Signal semaphore on context1
cl_semaphore_khr exportable_semaphore = cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, export_props, &err); clCreateSemaphoreWithPropertiesKHR(contexts[0], export_props, &err);
test_error(err, "Failed to create exportable semaphore"); test_error(err, "Failed to create exportable semaphore");
err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore, err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore,
@@ -313,12 +307,77 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
test_error(err, "Failed to release semaphore"); test_error(err, "Failed to release semaphore");
} }
err = clReleaseContext(context2);
test_error(err, "Failed to release context2");
return TEST_PASS; return TEST_PASS;
} }
REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
{
cl_int err = CL_SUCCESS;
clContextWrapper context_sec =
clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
test_error(err, "Failed to create context2");
cl_context contexts[2] = { context, context_sec };
clCommandQueueWrapper queue0 =
clCreateCommandQueue(context, device, 0, &err);
test_error(err, "Could not create command queue");
clCommandQueueWrapper queue1 =
clCreateCommandQueue(contexts[1], device, 0, &err);
test_error(err, "Could not create command queue");
cl_command_queue queues[2] = { queue0, queue1 };
return doTestImportExport(device, contexts, queues);
}
REGISTER_TEST_VERSION(external_semaphores_import_export, Version(1, 2))
{
cl_int err = CL_SUCCESS;
cl_int total_status = TEST_PASS;
// test external semaphores with out-of-order queue
{
cl_command_queue_properties device_props = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(err,
"clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
{
// Create ooo queue
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
cl_command_queue queues[2] = { test_queue, test_queue };
cl_context contexts[2] = { context, context };
cl_int status = doTestImportExport(device, contexts, queues);
if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
{
total_status = TEST_FAIL;
}
}
}
// test external semaphore sync fd with in-order harness queue
{
cl_command_queue queues[2] = { queue, queue };
cl_context contexts[2] = { context, context };
cl_int status = doTestImportExport(device, contexts, queues);
if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
{
total_status = TEST_FAIL;
}
}
return total_status;
}
// Confirm that a signal followed by a wait will complete successfully // Confirm that a signal followed by a wait will complete successfully
REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
{ {
@@ -366,9 +425,14 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
// Signal semaphore // Signal semaphore
clEventWrapper signal_event; clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), if (vkExternalSemaphoreHandleType
nullptr, 0, nullptr, &signal_event); != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext.getCLSemaphore(), nullptr, 0, nullptr,
&signal_event);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
}
// Wait semaphore // Wait semaphore
clEventWrapper wait_event; clEventWrapper wait_event;
@@ -381,7 +445,11 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
test_error(err, "Could not finish queue"); test_error(err, "Could not finish queue");
// Ensure all events are completed // Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_event); test_assert_event_complete(signal_event);
}
test_assert_event_complete(wait_event); test_assert_event_complete(wait_event);
} }
@@ -405,6 +473,7 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
// Obtain pointers to semaphore's API // Obtain pointers to semaphore's API
GET_PFN(device, clEnqueueSignalSemaphoresKHR); GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR); GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clReImportSemaphoreSyncFdKHR);
std::vector<VulkanExternalSemaphoreHandleType> std::vector<VulkanExternalSemaphoreHandleType>
vkExternalSemaphoreHandleTypeList = vkExternalSemaphoreHandleTypeList =
@@ -448,11 +517,15 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]); err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
test_error(err, "Unable to enqueue task_1"); test_error(err, "Unable to enqueue task_1");
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
// Signal semaphore (dependency on task_1) // Signal semaphore (dependency on task_1)
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), err = clEnqueueSignalSemaphoresKHR(
nullptr, 1, &task_events[0], queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
&signal_events[0]); &task_events[0], &signal_events[0]);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
}
// In a loop // In a loop
size_t loop; size_t loop;
@@ -473,12 +546,22 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
err = clWaitForEvents(1, &wait_events[loop - 1]); err = clWaitForEvents(1, &wait_events[loop - 1]);
test_error(err, "Unable to wait for wait semaphore to complete"); test_error(err, "Unable to wait for wait semaphore to complete");
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clReImportSemaphoreSyncFdKHR(sema_ext.getCLSemaphore(),
nullptr, -1);
test_error(err, "Could not reimport semaphore sync fd");
}
else
{
// Signal semaphore (dependency on task_loop) // Signal semaphore (dependency on task_loop)
err = clEnqueueSignalSemaphoresKHR( err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1, queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
&task_events[loop], &signal_events[loop]); &task_events[loop], &signal_events[loop]);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
} }
}
// Wait semaphore // Wait semaphore
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
@@ -494,7 +577,11 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
for (loop = 0; loop < loop_count; ++loop) for (loop = 0; loop < loop_count; ++loop)
{ {
test_assert_event_complete(wait_events[loop]); test_assert_event_complete(wait_events[loop]);
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_events[loop]); test_assert_event_complete(signal_events[loop]);
}
test_assert_event_complete(task_events[loop]); test_assert_event_complete(task_events[loop]);
} }
} }
@@ -536,6 +623,19 @@ static int external_semaphore_cross_queue_helper(cl_device_id device,
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList) vkExternalSemaphoreHandleTypeList)
{ {
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
std::stringstream log_message;
log_message
<< "Skipping semaphore type: \""
<< vkExternalSemaphoreHandleType
<< "\"; it cannot be signaled from OpenCL when imported."
<< std::endl;
log_info("%s", log_message.str().c_str());
continue;
}
log_info_semaphore_type(vkExternalSemaphoreHandleType); log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore(vkDevice, VulkanSemaphore vkVk2CLSemaphore(vkDevice,
vkExternalSemaphoreHandleType); vkExternalSemaphoreHandleType);
@@ -668,10 +768,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
// Signal semaphore 1 // Signal semaphore 1
clEventWrapper signal_1_event; clEventWrapper signal_1_event;
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR( err = clEnqueueSignalSemaphoresKHR(
queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr, queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
&signal_1_event); &signal_1_event);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
}
// Wait semaphore 1 // Wait semaphore 1
clEventWrapper wait_1_event; clEventWrapper wait_1_event;
@@ -682,10 +786,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
// Signal semaphore 2 // Signal semaphore 2
clEventWrapper signal_2_event; clEventWrapper signal_2_event;
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
err = clEnqueueSignalSemaphoresKHR( err = clEnqueueSignalSemaphoresKHR(
queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr, queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
&signal_2_event); &signal_2_event);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
}
// Wait semaphore 2 // Wait semaphore 2
clEventWrapper wait_2_event; clEventWrapper wait_2_event;
@@ -702,8 +810,12 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
test_error(err, "Could not finish queue"); test_error(err, "Could not finish queue");
// Ensure all events are completed // Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_1_event); test_assert_event_complete(signal_1_event);
test_assert_event_complete(signal_2_event); test_assert_event_complete(signal_2_event);
}
test_assert_event_complete(wait_1_event); test_assert_event_complete(wait_1_event);
test_assert_event_complete(wait_2_event); test_assert_event_complete(wait_2_event);
} }
@@ -741,6 +853,19 @@ REGISTER_TEST_VERSION(external_semaphores_multi_signal, Version(1, 2))
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList) vkExternalSemaphoreHandleTypeList)
{ {
if (vkExternalSemaphoreHandleType
== VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
std::stringstream log_message;
log_message
<< "Skipping semaphore type: \""
<< vkExternalSemaphoreHandleType
<< "\"; it cannot be signaled from OpenCL when imported."
<< std::endl;
log_info("%s", log_message.str().c_str());
continue;
}
log_info_semaphore_type(vkExternalSemaphoreHandleType); log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore1(vkDevice, VulkanSemaphore vkVk2CLSemaphore1(vkDevice,
vkExternalSemaphoreHandleType); vkExternalSemaphoreHandleType);
@@ -842,19 +967,23 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue"); test_error(err, "Could not create command queue");
// Signal semaphore 1
clEventWrapper signal_1_event; clEventWrapper signal_1_event;
err = clEventWrapper signal_2_event;
clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), if (vkExternalSemaphoreHandleType
nullptr, 0, nullptr, &signal_1_event); != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
// Signal semaphore 1
err = clEnqueueSignalSemaphoresKHR(
queue, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
&signal_1_event);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
// Signal semaphore 2 // Signal semaphore 2
clEventWrapper signal_2_event; err = clEnqueueSignalSemaphoresKHR(
err = queue, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(), &signal_2_event);
nullptr, 0, nullptr, &signal_2_event);
test_error(err, "Could not signal semaphore"); test_error(err, "Could not signal semaphore");
}
// Wait semaphore 1 and 2 // Wait semaphore 1 and 2
clEventWrapper wait_event; clEventWrapper wait_event;
@@ -869,8 +998,12 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
test_error(err, "Could not finish queue"); test_error(err, "Could not finish queue");
// Ensure all events are completed // Ensure all events are completed
if (vkExternalSemaphoreHandleType
!= VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
{
test_assert_event_complete(signal_1_event); test_assert_event_complete(signal_1_event);
test_assert_event_complete(signal_2_event); test_assert_event_complete(signal_2_event);
}
test_assert_event_complete(wait_event); test_assert_event_complete(wait_event);
} }

View File

@@ -1,131 +0,0 @@
//
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
#include "harness/errorHelpers.h"
// Test it is possible to export a semaphore to a sync fd and import the same
// sync fd to a new semaphore
REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_semaphore"))
{
log_info(
"cl_khr_external_semaphore is not supported on this platoform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(device, "cl_khr_external_semaphore_sync_fd"))
{
log_info("cl_khr_external_semaphore_sync_fd is not supported on this "
"platoform. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_command_queue_properties device_props = 0;
err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(device_props), &device_props, NULL);
test_error(err, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0)
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not "
"supported. Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
// Create ooo queue
clCommandQueueWrapper test_queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create semaphore
cl_semaphore_properties_khr sema_1_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr sema_1 =
clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err);
test_error(err, "Could not create semaphore");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(test_queue, 1, &sema_1, nullptr, 0,
nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Extract sync fd
int handle = -1;
size_t handle_size;
err = clGetSemaphoreHandleForTypeKHR(sema_1, device,
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
sizeof(handle), &handle, &handle_size);
test_error(err, "Could not extract semaphore handle");
test_assert_error(sizeof(handle) == handle_size, "Invalid handle size");
test_assert_error(handle >= 0, "Invalid handle");
// Create semaphore from sync fd
cl_semaphore_properties_khr sema_2_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
static_cast<cl_semaphore_properties_khr>(handle), 0
};
cl_semaphore_khr sema_2 =
clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err);
test_error(err, "Could not create semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(test_queue, 1, &sema_2, nullptr, 0,
nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Finish
err = clFinish(test_queue);
test_error(err, "Could not finish queue");
// Check all events are completed
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Release semaphore
err = clReleaseSemaphoreKHR(sema_1);
test_error(err, "Could not release semaphore");
err = clReleaseSemaphoreKHR(sema_2);
test_error(err, "Could not release semaphore");
return TEST_PASS;
}

View File

@@ -17,6 +17,7 @@
int main(int argc, const char *argv[]) int main(int argc, const char *argv[])
{ {
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), return runTestHarness(
argc, argv, static_cast<int>(test_registry::getInstance().num_tests()),
test_registry::getInstance().definitions(), false, 0); test_registry::getInstance().definitions(), false, 0);
} }

View File

@@ -21,81 +21,91 @@
#include "harness/errorHelpers.h" #include "harness/errorHelpers.h"
#include "directx_wrapper.hpp" #include "directx_wrapper.hpp"
class CLDXSemaphoreWrapper { struct DXFenceTestBase
public: {
CLDXSemaphoreWrapper(cl_device_id device, cl_context context, DXFenceTestBase(cl_device_id device, cl_context context,
ID3D12Device* dx_device) cl_command_queue queue, cl_int num_elems)
: device(device), context(context), dx_device(dx_device){}; : device(device), context(context), queue(queue), num_elems(num_elems)
{}
int createSemaphoreFromFence(ID3D12Fence* fence) virtual ~DXFenceTestBase()
{ {
cl_int errcode = CL_SUCCESS;
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
const HRESULT hr = dx_device->CreateSharedHandle(
fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore");
return CL_SUCCESS;
}
~CLDXSemaphoreWrapper()
{
releaseSemaphore();
if (fence_handle) if (fence_handle)
{ {
CloseHandle(fence_handle); CloseHandle(fence_handle);
fence_handle = nullptr;
} }
}; if (fence_wrapper)
const cl_semaphore_khr* operator&() const { return &semaphore; };
cl_semaphore_khr operator*() const { return semaphore; };
HANDLE getHandle() const { return fence_handle; };
private:
cl_semaphore_khr semaphore;
ComPtr<ID3D12Fence> fence;
HANDLE fence_handle;
cl_device_id device;
cl_context context;
ComPtr<ID3D12Device> dx_device;
int releaseSemaphore() const
{ {
GET_PFN(device, clReleaseSemaphoreKHR); delete fence_wrapper;
fence_wrapper = nullptr;
}
if (semaphore) if (semaphore)
{ {
clReleaseSemaphoreKHR(semaphore); clReleaseSemaphoreKHR(semaphore);
semaphore = nullptr;
}
};
virtual int SetUp()
{
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_FUNCTION_EXTENSION_ADDRESS(device,
clCreateSemaphoreWithPropertiesKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
test_error(
!is_import_handle_available(CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
fence_wrapper = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
semaphore = createSemaphoreFromFence(fence_wrapper->get());
test_assert_error(!!semaphore, "Could not create semaphore");
return TEST_PASS;
} }
return CL_SUCCESS; virtual cl_int Run() = 0;
}
};
static bool protected:
is_import_handle_available(cl_device_id device,
const cl_external_memory_handle_type_khr handle_type)
{
int errcode = CL_SUCCESS; int errcode = CL_SUCCESS;
cl_device_id device = nullptr;
cl_context context = nullptr;
cl_command_queue queue = nullptr;
cl_int num_elems = 0;
DirectXWrapper dx_wrapper;
cl_semaphore_payload_khr semaphore_payload = 1;
cl_semaphore_khr semaphore = nullptr;
HANDLE fence_handle = nullptr;
DirectXFenceWrapper *fence_wrapper = nullptr;
clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR =
nullptr;
clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr;
clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr;
clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr;
clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr;
clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr;
clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr;
[[nodiscard]] bool is_import_handle_available(
const cl_external_memory_handle_type_khr handle_type)
{
size_t import_types_size = 0; size_t import_types_size = 0;
errcode = errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0, clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
nullptr, &import_types_size); 0, nullptr, &import_types_size);
if (errcode != CL_SUCCESS) if (errcode != CL_SUCCESS)
{ {
log_error("Could not query import semaphore handle types"); log_error("Could not query import semaphore handle types");
@@ -114,4 +124,46 @@ is_import_handle_available(cl_device_id device,
return std::find(import_types.begin(), import_types.end(), handle_type) return std::find(import_types.begin(), import_types.end(), handle_type)
!= import_types.end(); != import_types.end();
}
cl_semaphore_khr createSemaphoreFromFence(ID3D12Fence *src_fence)
{
const HRESULT hr = dx_wrapper.getDXDevice()->CreateSharedHandle(
src_fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
if (FAILED(hr)) return nullptr;
const cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
};
cl_semaphore_khr tmp_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
if (errcode != CL_SUCCESS) return nullptr;
return tmp_semaphore;
}
};
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
{
cl_int status = TEST_PASS;
try
{
auto test_fixture = T(device, context, queue, nelems);
status = test_fixture.SetUp();
if (status != TEST_PASS) return status;
status = test_fixture.Run();
} catch (const std::runtime_error &e)
{
log_error("%s", e.what());
return TEST_FAIL;
}
return status;
} }

View File

@@ -16,37 +16,17 @@
#include "semaphore_dx_fence_base.h" #include "semaphore_dx_fence_base.h"
// Confirm that a signal followed by a wait in OpenCL will complete successfully struct SignalWait final : DXFenceTestBase
REGISTER_TEST(test_external_semaphores_signal_wait)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
cl_int Run() override
{
log_info("Calling clEnqueueSignalSemaphoresKHR\n"); log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event; clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR( errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); &semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to signal semaphore"); test_error(errcode, "Failed to signal semaphore");
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
@@ -63,44 +43,29 @@ REGISTER_TEST(test_external_semaphores_signal_wait)
test_assert_event_complete(wait_event); test_assert_event_complete(wait_event);
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a signal followed by a wait in OpenCL will complete successfully
REGISTER_TEST(test_external_semaphores_signal_wait)
{
return MakeAndRunTest<SignalWait>(device, context, queue, num_elements);
} }
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete struct SignalDXCPU final : DXFenceTestBase
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
cl_int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event; clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR( errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_fence->Signal()\n"); log_info("Calling d3d12_fence->Signal()\n");
const HRESULT hr = (*fence)->Signal(semaphore_payload); const HRESULT hr = fence_wrapper->get()->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence"); test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue); errcode = clFinish(queue);
@@ -109,45 +74,31 @@ REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
test_assert_event_complete(wait_event); test_assert_event_complete(wait_event);
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
{
return MakeAndRunTest<SignalDXCPU>(device, context, queue, num_elements);
} }
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete struct SignalDXGPU final : DXFenceTestBase
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
cl_int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event; clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR( errcode = clEnqueueWaitSemaphoresKHR(
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n"); log_info("Calling d3d12_command_queue->Signal()\n");
const HRESULT hr = const HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload); fence_wrapper->get(), semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence"); test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue); errcode = clFinish(queue);
@@ -156,49 +107,37 @@ REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
test_assert_event_complete(wait_event); test_assert_event_complete(wait_event);
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
{
return MakeAndRunTest<SignalDXGPU>(device, context, queue, num_elements);
} }
// Confirm that interlocking waits between OpenCL and DX12 will complete struct CLDXInterlock final : DXFenceTestBase
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
cl_int Run() override
{
log_info("Calling d3d12_command_queue->Wait(1)\n"); log_info("Calling d3d12_command_queue->Wait(1)\n");
cl_semaphore_payload_khr semaphore_payload = 1; HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
HRESULT hr = semaphore_payload);
dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence"); test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling d3d12_command_queue->Signal(2)\n"); log_info("Calling d3d12_command_queue->Signal(2)\n");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1); hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence"); test_error(FAILED(hr), "Failed to signal D3D12 fence");
log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n"); log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
clEventWrapper signal_event; clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR( errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); &semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR"); test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n"); log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
@@ -215,42 +154,53 @@ REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
test_assert_event_complete(signal_event); test_assert_event_complete(signal_event);
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that interlocking waits between OpenCL and DX12 will complete
// successfully
REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
{
return MakeAndRunTest<CLDXInterlock>(device, context, queue, num_elements);
} }
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits struct MultipleWaitSignal final : DXFenceTestBase
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore"); ~MultipleWaitSignal() override
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); {
if (fence_handle_2)
{
CloseHandle(fence_handle_2);
fence_handle_2 = nullptr;
}
if (fence_wrapper_2)
{
delete fence_wrapper_2;
fence_wrapper_2 = nullptr;
}
if (semaphore_2)
{
clReleaseSemaphoreKHR(semaphore_2);
semaphore_2 = nullptr;
}
DXFenceTestBase::~DXFenceTestBase();
};
// Obtain pointers to semaphore's API int SetUp() override
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); {
GET_PFN(device, clReleaseSemaphoreKHR); DXFenceTestBase::SetUp();
GET_PFN(device, clEnqueueSignalSemaphoresKHR); fence_wrapper_2 = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
GET_PFN(device, clEnqueueWaitSemaphoresKHR); semaphore_2 = createSemaphoreFromFence(fence_wrapper_2->get());
test_assert_error(!!semaphore_2, "Could not create semaphore");
test_error(!is_import_handle_available(device, return TEST_PASS;
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), }
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL cl_int Run() override
const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice()); {
CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice()); const cl_semaphore_khr semaphore_list[] = { semaphore, semaphore_2 };
test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
"Could not create semaphore");
const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
"Could not create semaphore");
const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
cl_semaphore_payload_khr semaphore_payload_list[] = { cl_semaphore_payload_khr semaphore_payload_list[] = {
semaphore_payload, semaphore_payload + 1 semaphore_payload, semaphore_payload + 1
}; };
@@ -263,16 +213,20 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
log_info("Calling d3d12_command_queue->Signal()\n"); log_info("Calling d3d12_command_queue->Signal()\n");
HRESULT hr = HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1); fence_wrapper_2->get(), semaphore_payload + 1);
test_error(FAILED(hr), "Failed to signal D3D12 fence 2"); test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload); hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence 1"); test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
log_info("Calling d3d12_command_queue->Wait() with different payloads\n"); log_info(
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3); "Calling d3d12_command_queue->Wait() with different payloads\n");
hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
semaphore_payload + 3);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 1"); test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2); hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper_2->get(),
semaphore_payload + 2);
test_error(FAILED(hr), "Failed to wait on D3D12 fence 2"); test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
errcode = clFinish(queue); errcode = clFinish(queue);
@@ -286,29 +240,31 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
log_info("Calling clEnqueueSignalSemaphoresKHR\n"); log_info("Calling clEnqueueSignalSemaphoresKHR\n");
clEventWrapper signal_event; clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list, errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
semaphore_payload_list, 0, nullptr, semaphore_payload_list, 0,
&signal_event); nullptr, &signal_event);
test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR"); test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
// Wait until the GPU has completed commands up to this fence point. // Wait until the GPU has completed commands up to this fence point.
log_info("Waiting for D3D12 command queue completion\n"); log_info("Waiting for D3D12 command queue completion\n");
if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0]) if (fence_wrapper->get()->GetCompletedValue()
< semaphore_payload_list[0])
{ {
const HANDLE event_handle = const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0], hr = fence_wrapper->get()->SetEventOnCompletion(
event_handle); semaphore_payload_list[0], event_handle);
test_error(FAILED(hr), test_error(FAILED(hr),
"Failed to set D3D12 fence 1 event on completion"); "Failed to set D3D12 fence 1 event on completion");
WaitForSingleObject(event_handle, INFINITE); WaitForSingleObject(event_handle, INFINITE);
CloseHandle(event_handle); CloseHandle(event_handle);
} }
if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1]) if (fence_wrapper_2->get()->GetCompletedValue()
< semaphore_payload_list[1])
{ {
const HANDLE event_handle = const HANDLE event_handle =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1], hr = fence_wrapper_2->get()->SetEventOnCompletion(
event_handle); semaphore_payload_list[1], event_handle);
test_error(FAILED(hr), test_error(FAILED(hr),
"Failed to set D3D12 fence 2 event on completion"); "Failed to set D3D12 fence 2 event on completion");
WaitForSingleObject(event_handle, INFINITE); WaitForSingleObject(event_handle, INFINITE);
@@ -321,4 +277,18 @@ REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
test_assert_event_complete(signal_event); test_assert_event_complete(signal_event);
return TEST_PASS; return TEST_PASS;
}
protected:
cl_semaphore_khr semaphore_2 = nullptr;
HANDLE fence_handle_2 = nullptr;
DirectXFenceWrapper *fence_wrapper_2 = nullptr;
};
// Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
// in DX12 followed by signals in OpenCL complete successfully
REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
{
return MakeAndRunTest<MultipleWaitSignal>(device, context, queue,
num_elements);
} }

View File

@@ -16,28 +16,16 @@
#include "semaphore_dx_fence_base.h" #include "semaphore_dx_fence_base.h"
// Confirm that a wait followed by a signal in DirectX 12 using an exported struct ExportDXSignal final : DXFenceTestBase
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
int Run() override
{
size_t export_types_size = 0; size_t export_types_size = 0;
errcode = errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0, clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
nullptr, &export_types_size); 0, nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types"); test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types( std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
@@ -50,14 +38,16 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end()) == export_types.end())
{ {
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " log_info(
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n"); "the supported export types\n");
return TEST_FAIL; return TEST_FAIL;
} }
constexpr cl_semaphore_properties_khr sem_props[] = { constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>( static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>( static_cast<cl_semaphore_properties_khr>(
@@ -66,37 +56,37 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0 0
}; };
cl_semaphore_khr semaphore = cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore"); test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE; cl_bool is_exportable = CL_FALSE;
errcode = errcode = clGetSemaphoreInfoKHR(
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr); sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info"); test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable"); test_error(!is_exportable, "Semaphore is not exportable");
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper wait_event; clEventWrapper wait_event;
errcode = clEnqueueWaitSemaphoresKHR( errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &exportable_semaphore,
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); &semaphore_payload, 0, nullptr,
&wait_event);
test_error(errcode, "Failed to wait semaphore"); test_error(errcode, "Failed to wait semaphore");
HANDLE semaphore_handle = nullptr; HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR( errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr); sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle"); test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr; ID3D12Fence *exported_fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle, errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
IID_PPV_ARGS(&fence)); semaphore_handle, IID_PPV_ARGS(&exported_fence));
test_error(errcode, "Could not open semaphore handle"); test_error(errcode, "Could not open semaphore handle");
log_info("Calling fence->Signal()\n"); log_info("Calling fence->Signal()\n");
const HRESULT hr = fence->Signal(semaphore_payload); const HRESULT hr = exported_fence->Signal(semaphore_payload);
test_error(FAILED(hr), "Failed to signal D3D12 fence"); test_error(FAILED(hr), "Failed to signal D3D12 fence");
errcode = clFinish(queue); errcode = clFinish(queue);
@@ -106,34 +96,31 @@ REGISTER_TEST(test_external_semaphores_export_dx_signal)
// Release resources // Release resources
CloseHandle(semaphore_handle); CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore"); test_error(clReleaseSemaphoreKHR(exportable_semaphore),
fence->Release(); "Could not release semaphore");
exported_fence->Release();
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a wait followed by a signal in DirectX 12 using an exported
// semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_signal)
{
return MakeAndRunTest<ExportDXSignal>(device, context, queue, num_elements);
} }
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an struct ExportDXWait final : DXFenceTestBase
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
int Run() override
{
size_t export_types_size = 0; size_t export_types_size = 0;
errcode = errcode =
clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0, clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
nullptr, &export_types_size); 0, nullptr, &export_types_size);
test_error(errcode, "Could not query export semaphore handle types"); test_error(errcode, "Could not query export semaphore handle types");
std::vector<cl_external_semaphore_handle_type_khr> export_types( std::vector<cl_external_semaphore_handle_type_khr> export_types(
export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
@@ -146,14 +133,16 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
== export_types.end()) == export_types.end())
{ {
log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " log_info(
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
"the supported export types\n"); "the supported export types\n");
return TEST_FAIL; return TEST_FAIL;
} }
constexpr cl_semaphore_properties_khr sem_props[] = { constexpr cl_semaphore_properties_khr sem_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
static_cast<cl_semaphore_properties_khr>( static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>( static_cast<cl_semaphore_properties_khr>(
@@ -162,13 +151,13 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0 0
}; };
cl_semaphore_khr semaphore = cl_semaphore_khr exportable_semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
test_error(errcode, "Could not create semaphore"); test_error(errcode, "Could not create semaphore");
cl_bool is_exportable = CL_FALSE; cl_bool is_exportable = CL_FALSE;
errcode = errcode = clGetSemaphoreInfoKHR(
clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
sizeof(is_exportable), &is_exportable, nullptr); sizeof(is_exportable), &is_exportable, nullptr);
test_error(errcode, "Could not get semaphore info"); test_error(errcode, "Could not get semaphore info");
test_error(!is_exportable, "Semaphore is not exportable"); test_error(!is_exportable, "Semaphore is not exportable");
@@ -176,31 +165,33 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
log_info("Calling clEnqueueSignalSemaphoresKHR\n"); log_info("Calling clEnqueueSignalSemaphoresKHR\n");
constexpr cl_semaphore_payload_khr semaphore_payload = 1; constexpr cl_semaphore_payload_khr semaphore_payload = 1;
clEventWrapper signal_event; clEventWrapper signal_event;
errcode = clEnqueueSignalSemaphoresKHR( errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &exportable_semaphore,
queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); &semaphore_payload, 0, nullptr,
&signal_event);
test_error(errcode, "Failed to signal semaphore"); test_error(errcode, "Failed to signal semaphore");
HANDLE semaphore_handle = nullptr; HANDLE semaphore_handle = nullptr;
errcode = clGetSemaphoreHandleForTypeKHR( errcode = clGetSemaphoreHandleForTypeKHR(
semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
sizeof(semaphore_handle), &semaphore_handle, nullptr); sizeof(semaphore_handle), &semaphore_handle, nullptr);
test_error(errcode, "Could not get semaphore handle"); test_error(errcode, "Could not get semaphore handle");
ID3D12Fence *fence = nullptr; ID3D12Fence *exported_fence = nullptr;
errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle, errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
IID_PPV_ARGS(&fence)); semaphore_handle, IID_PPV_ARGS(&exported_fence));
test_error(errcode, "Could not open semaphore handle"); test_error(errcode, "Could not open semaphore handle");
log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n"); log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload); HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(exported_fence,
semaphore_payload);
test_error(FAILED(hr), "Failed to wait on D3D12 fence"); test_error(FAILED(hr), "Failed to wait on D3D12 fence");
log_info("Calling WaitForSingleObject\n"); log_info("Calling WaitForSingleObject\n");
if (fence->GetCompletedValue() < semaphore_payload) if (exported_fence->GetCompletedValue() < semaphore_payload)
{ {
const HANDLE event = const HANDLE event =
CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
hr = fence->SetEventOnCompletion(semaphore_payload, event); hr = exported_fence->SetEventOnCompletion(semaphore_payload, event);
test_error(FAILED(hr), "Failed to set event on completion"); test_error(FAILED(hr), "Failed to set event on completion");
WaitForSingleObject(event, INFINITE); WaitForSingleObject(event, INFINITE);
CloseHandle(event); CloseHandle(event);
@@ -213,8 +204,17 @@ REGISTER_TEST(test_external_semaphores_export_dx_wait)
// Release resources // Release resources
CloseHandle(semaphore_handle); CloseHandle(semaphore_handle);
test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore"); test_error(clReleaseSemaphoreKHR(exportable_semaphore),
fence->Release(); "Could not release semaphore");
exported_fence->Release();
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
// exported semaphore will complete successfully
REGISTER_TEST(test_external_semaphores_export_dx_wait)
{
return MakeAndRunTest<ExportDXWait>(device, context, queue, num_elements);
} }

View File

@@ -16,32 +16,12 @@
#include "semaphore_dx_fence_base.h" #include "semaphore_dx_fence_base.h"
// Confirm that a wait without a semaphore payload list will return struct DXFenceNegativeWait final : DXFenceTestBase
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr); nullptr, nullptr);
@@ -50,34 +30,23 @@ REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
"Unexpected error code returned from clEnqueueWaitSemaphores"); "Unexpected error code returned from clEnqueueWaitSemaphores");
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a wait without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
{
return MakeAndRunTest<DXFenceNegativeWait>(device, context, queue,
num_elements);
} }
// Confirm that a signal without a semaphore payload list will return struct DXFenceNegativeSignal final : DXFenceTestBase
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
int Run() override
{
log_info("Calling clEnqueueWaitSemaphoresKHR\n"); log_info("Calling clEnqueueWaitSemaphoresKHR\n");
errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
nullptr, nullptr); nullptr, nullptr);
@@ -86,4 +55,13 @@ REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
"Unexpected error code returned from clEnqueueSignalSemaphores"); "Unexpected error code returned from clEnqueueSignalSemaphores");
return TEST_PASS; return TEST_PASS;
}
};
// Confirm that a signal without a semaphore payload list will return
// CL_INVALID_VALUE
REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
{
return MakeAndRunTest<DXFenceNegativeSignal>(device, context, queue,
num_elements);
} }

View File

@@ -16,39 +16,19 @@
#include "semaphore_dx_fence_base.h" #include "semaphore_dx_fence_base.h"
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the struct DXFenceQueryProperties final : DXFenceTestBase
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
{ {
int errcode = CL_SUCCESS; using DXFenceTestBase::DXFenceTestBase;
const DirectXWrapper dx_wrapper;
REQUIRE_EXTENSION("cl_khr_external_semaphore");
REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
// Obtain pointers to semaphore's API
GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
GET_PFN(device, clReleaseSemaphoreKHR);
GET_PFN(device, clGetSemaphoreInfoKHR);
test_error(!is_import_handle_available(device,
CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
"Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
"supported import types");
// Import D3D12 fence into OpenCL
const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
test_error(semaphore.createSemaphoreFromFence(*fence),
"Could not create semaphore");
int Run() override
{
size_t properties_size_bytes = 0; size_t properties_size_bytes = 0;
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0, errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
nullptr, &properties_size_bytes); 0, nullptr, &properties_size_bytes);
test_error(errcode, "Could not get semaphore info"); test_error(errcode, "Could not get semaphore info");
std::vector<cl_semaphore_properties_khr> semaphore_properties( std::vector<cl_semaphore_properties_khr> semaphore_properties(
properties_size_bytes / sizeof(cl_semaphore_properties_khr)); properties_size_bytes / sizeof(cl_semaphore_properties_khr));
errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
properties_size_bytes, properties_size_bytes,
semaphore_properties.data(), nullptr); semaphore_properties.data(), nullptr);
test_error(errcode, "Could not get semaphore info"); test_error(errcode, "Could not get semaphore info");
@@ -58,12 +38,21 @@ REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
&& semaphore_properties[i + 1] && semaphore_properties[i + 1]
== reinterpret_cast<cl_semaphore_properties_khr>( == reinterpret_cast<cl_semaphore_properties_khr>(
semaphore.getHandle())) fence_handle))
{ {
return TEST_PASS; return TEST_PASS;
} }
} }
log_error( log_error("Failed to find the dx fence handle type in the semaphore "
"Failed to find the dx fence handle type in the semaphore properties"); "properties");
return TEST_FAIL; return TEST_FAIL;
}
};
// Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
// properties returned by clGetSemaphoreInfo
REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
{
return MakeAndRunTest<DXFenceQueryProperties>(device, context, queue,
num_elements);
} }

View File

@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
test_semaphores_negative_create.cpp test_semaphores_negative_create.cpp
test_semaphores_cross_queue.cpp test_semaphores_cross_queue.cpp
test_semaphores_queries.cpp test_semaphores_queries.cpp
test_semaphores_payload.cpp
semaphore_base.h semaphore_base.h
) )

View File

@@ -23,6 +23,7 @@
#include "harness/deviceInfo.h" #include "harness/deviceInfo.h"
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
#include "harness/extensionHelpers.h"
struct SemaphoreBase struct SemaphoreBase
{ {
@@ -37,27 +38,15 @@ struct SemaphoreBase
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
// If it is supported get the addresses of all the APIs here. // If it is supported get the addresses of all the APIs here.
// clang-format off GET_FUNCTION_EXTENSION_ADDRESS(device,
#define GET_EXTENSION_ADDRESS(FUNC) \ clCreateSemaphoreWithPropertiesKHR);
FUNC = reinterpret_cast<FUNC##_fn>( \ GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
if (FUNC == nullptr) \ GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
{ \ GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
" with " #FUNC "\n"); \ GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
return TEST_FAIL; \
}
// clang-format on
GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR);
GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR);
GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR);
GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR);
GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR);
GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR);
GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR);
#undef GET_EXTENSION_ADDRESS
return CL_SUCCESS; return CL_SUCCESS;
} }

View File

@@ -14,14 +14,8 @@
// limitations under the License. // limitations under the License.
// //
#include <thread>
#include "semaphore_base.h" #include "semaphore_base.h"
#include "semaphore_base.h"
#define FLUSH_DELAY_S 5
namespace { namespace {
const char* source = "__kernel void empty() {}"; const char* source = "__kernel void empty() {}";

View File

@@ -0,0 +1,86 @@
//
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "semaphore_base.h"
namespace {
struct PayloadSemaphore : public SemaphoreTestBase
{
clSemaphoreWrapper sema_sec;
PayloadSemaphore(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
: SemaphoreTestBase(device, context, queue, nelems), sema_sec(this)
{}
cl_int Run() override
{
cl_int err = CL_SUCCESS;
// Create semaphore
cl_semaphore_properties_khr sema_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
sema_sec =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
{
cl_semaphore_payload_khr payload_list[] = { 1, 2 };
cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
// Signal semaphore
err = clEnqueueSignalSemaphoresKHR(
queue, 2, semaphores, payload_list, 0, nullptr, nullptr);
test_error(err, "Could not signal semaphore");
}
{
cl_semaphore_payload_khr payload_list[] = { 3, 4 };
cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
// Wait semaphore
err = clEnqueueWaitSemaphoresKHR(queue, 2, semaphores, payload_list,
0, nullptr, nullptr);
test_error(err, "Could not wait semaphore");
}
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
return CL_SUCCESS;
}
};
} // anonymous namespace
// Confirm that a valid semaphore payload values list will be ignored if no
// semaphores in the list of sema_objects require a payload
REGISTER_TEST_VERSION(semaphores_payload, Version(1, 2))
{
return MakeAndRunTest<PayloadSemaphore>(device, context, queue,
num_elements);
}

View File

@@ -336,9 +336,8 @@ int test_cl_image_write(cl_context context, cl_command_queue queue,
get_explicit_type_name(*outType), suffix, convert); get_explicit_type_name(*outType), suffix, convert);
programPtr = kernelSource; programPtr = kernelSource;
if (create_single_kernel_helper_with_build_options( if (create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, (const char **)&programPtr, (const char **)&programPtr, "sample_test"))
"sample_test", ""))
{ {
return -1; return -1;
} }

View File

@@ -629,7 +629,7 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
clCreateImage(context, flag, &format, &image_desc, nullptr, clCreateImage(context, flag, &format, &image_desc, nullptr,
&err); &err);
test_failure_error(err, CL_INVALID_MEM_OBJECT, test_failure_error(err, CL_INVALID_IMAGE_SIZE,
"Unexpected clCreateImage return"); "Unexpected clCreateImage return");
err = clReleaseMemObject(buffer); err = clReleaseMemObject(buffer);

View File

@@ -859,7 +859,9 @@ double reference_add(double x, double y)
__m128 vb = _mm_set_ss((float)b); __m128 vb = _mm_set_ss((float)b);
va = _mm_add_ss(va, vb); va = _mm_add_ss(va, vb);
_mm_store_ss((float *)&a, va); _mm_store_ss((float *)&a, va);
#elif defined(__PPC__) #elif defined(__PPC__) || defined(__riscv)
// RISC-V CPUs with default 'f' fp32 extension do not support any way to
// enable/disable FTZ mode, subnormals are always handled without flushing.
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference add with FTZ must be emulated in // denorm's to zero. As such, the reference add with FTZ must be emulated in
// sw. // sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
} ub; } ub;
ub.d = b; ub.d = b;
cl_uint mantA, mantB; cl_uint mantA, mantB;
cl_ulong addendA, addendB, sum; cl_ulong addendA, addendB;
int expA = extractf(a, &mantA); int expA = extractf(a, &mantA);
int expB = extractf(b, &mantB); int expB = extractf(b, &mantB);
cl_uint signA = ua.u & 0x80000000U; cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
__m128 vb = _mm_set_ss((float)b); __m128 vb = _mm_set_ss((float)b);
va = _mm_mul_ss(va, vb); va = _mm_mul_ss(va, vb);
_mm_store_ss((float *)&a, va); _mm_store_ss((float *)&a, va);
#elif defined(__PPC__) #elif defined(__PPC__) || defined(__riscv)
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
// denorm's to zero. As such, the reference multiply with FTZ must be // denorm's to zero. As such, the reference multiply with FTZ must be
// emulated in sw. // emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
long double reference_rintl(long double x) long double reference_rintl(long double x)
{ {
#if defined(__PPC__) #if defined(__PPC__) || defined(__riscv)
// On PPC, long doubles are maintained as 2 doubles. Therefore, the combined // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
// mantissa can represent more than LDBL_MANT_DIG binary digits. // mantissa can represent more than LDBL_MANT_DIG binary digits.
x = rintl(x); x = rintl(x);

View File

@@ -557,8 +557,9 @@ int TestNonUniformWorkGroup::prepareDevice () {
if (_testRange & Range::BARRIERS) if (_testRange & Range::BARRIERS)
buildOptions += " -D TESTBARRIERS"; buildOptions += " -D TESTBARRIERS";
err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1, err = create_single_kernel_helper(_context, &_program, &_testKernel, 1,
&KERNEL_FUNCTION, "testKernel", buildOptions.c_str()); &KERNEL_FUNCTION, "testKernel",
buildOptions.c_str());
if (err) if (err)
{ {
log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__); log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
@@ -842,8 +843,9 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
if (testRange & Range::BARRIERS) if (testRange & Range::BARRIERS)
buildOptions += " -D TESTBARRIERS"; buildOptions += " -D TESTBARRIERS";
err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1, err = create_single_kernel_helper(_context, &program, &testKernel, 1,
&KERNEL_FUNCTION, "testKernel", buildOptions.c_str()); &KERNEL_FUNCTION, "testKernel",
buildOptions.c_str());
if (err) if (err)
{ {
log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__); log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);

View File

@@ -63,8 +63,8 @@ REGISTER_TEST(pipe_info)
log_info( " CL_PIPE_MAX_PACKETS passed.\n" ); log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
} }
err = create_single_kernel_helper_with_build_options( err = create_single_kernel_helper(context, &program, &kernel, 1,
context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel", &pipe_kernel_code, "pipe_kernel",
"-cl-std=CL2.0 -cl-kernel-arg-info"); "-cl-std=CL2.0 -cl-kernel-arg-info");
test_error_fail(err, "Error creating program"); test_error_fail(err, "Error creating program");

View File

@@ -70,14 +70,15 @@ struct printDataGenParameters
{ {
std::vector<std::string> genericFormats; std::vector<std::string> genericFormats;
const char* dataRepresentation; const char* dataRepresentation;
const char* vectorFormatFlag; const char* vectorFormatFlag = nullptr;
const char* vectorFormatSpecifier; const char* vectorFormatSpecifier = nullptr;
const char* dataType; const char* dataType = nullptr;
const char* vectorSize; const char* vectorSize = nullptr;
const char* addrSpaceArgumentTypeQualifier; const char* addrSpaceArgumentTypeQualifier = nullptr;
const char* addrSpaceVariableTypeQualifier; const char* addrSpaceVariableTypeQualifier = nullptr;
const char* addrSpaceParameter; const char* addrSpaceParameter = nullptr;
const char* addrSpacePAdd; const char* addrSpacePAdd = nullptr;
bool allowFallbackTest = false;
}; };
// Reference results - filled out at run-time // Reference results - filled out at run-time
@@ -111,6 +112,9 @@ struct testCase
char*, char*,
const size_t); //function pointer for generating reference results const size_t); //function pointer for generating reference results
Type dataType; //the data type that will be printed during reference result generation (used for setting rounding mode) Type dataType; //the data type that will be printed during reference result generation (used for setting rounding mode)
bool (*fallbackTestFN)(const char*,
const char*) =
nullptr; // function pointer to perform fallback test if required
}; };
extern const char* strType[]; extern const char* strType[];

View File

@@ -26,8 +26,11 @@ static void intRefBuilder(printDataGenParameters&, char*, const size_t);
static void halfRefBuilder(printDataGenParameters&, char* rResult, static void halfRefBuilder(printDataGenParameters&, char* rResult,
const size_t); const size_t);
static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t); static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
static bool floatRefTest(const char* refResult, const char* analysisBuffer);
static void doubleRefBuilder(printDataGenParameters&, char* rResult, static void doubleRefBuilder(printDataGenParameters&, char* rResult,
const size_t); const size_t);
static bool doubleRefTest(const char* refResult, const char* analysisBuffer);
static void octalRefBuilder(printDataGenParameters&, char*, const size_t); static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t); static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
static void hexRefBuilder(printDataGenParameters&, char*, const size_t); static void hexRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -468,12 +471,12 @@ std::vector<printDataGenParameters> printFloatGenParameters = {
// Double argument representing floating-point,in [-]xh.hhhhpAd style // Double argument representing floating-point,in [-]xh.hhhhpAd style
{ { "%.6a" }, "0.1f" }, { { "%.6a" }, "0.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,Double argument representing floating-point,in //(Minimum)Ten-wide,Double argument representing floating-point,in
// xh.hhhhpAd style,default(right)-justified // xh.hhhhpAd style,default(right)-justified
{ { "%10.2a" }, "9990.235f" }, { { "%10.2a" }, "1.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,two positions after the decimal,with //(Minimum)Ten-wide,two positions after the decimal,with
// a blank space inserted before the value, default(right)-justified // a blank space inserted before the value, default(right)-justified
@@ -502,8 +505,9 @@ testCase testCaseFloat = {
floatRefBuilder, floatRefBuilder,
kfloat kfloat,
floatRefTest
}; };
//============================================== //==============================================
@@ -673,12 +677,12 @@ std::vector<printDataGenParameters> printDoubleGenParameters = {
// Double argument representing floating-point,in [-]xh.hhhhpAd style // Double argument representing floating-point,in [-]xh.hhhhpAd style
{ { "%.6a" }, "0.1" }, { { "%.6a" }, "0.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
//(Minimum)Ten-wide,Double argument representing floating-point,in //(Minimum)Ten-wide,Double argument representing floating-point,in
// xh.hhhhpAd style,default(right)-justified // xh.hhhhpAd style,default(right)-justified
{ { "%10.2a" }, "9990.235" }, { { "%10.2a" }, "1.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
}; };
//--------------------------------------------------------- //---------------------------------------------------------
@@ -697,8 +701,9 @@ testCase testCaseDouble = {
doubleRefBuilder, doubleRefBuilder,
kdouble kdouble,
doubleRefTest
}; };
//============================================== //==============================================
@@ -1032,6 +1037,9 @@ testCase testCaseChar = {
std::vector<printDataGenParameters> printStringGenParameters = { std::vector<printDataGenParameters> printStringGenParameters = {
// empty format, no data representation
{ {""} },
// empty format // empty format
{ {""}, "\"foo\"" }, { {""}, "\"foo\"" },
@@ -1094,6 +1102,8 @@ std::vector<std::string> correctBufferString = {
"", "",
"",
" foo", " foo",
"f", "f",
@@ -1752,7 +1762,15 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
return !std::regex_match(analysisBuffer, nanRegex); return !std::regex_match(analysisBuffer, nanRegex);
} }
return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str()); size_t ret =
strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
if (ret != 0 && pTestCase->_genParameters[testId].allowFallbackTest
&& pTestCase->fallbackTestFN)
if (pTestCase->fallbackTestFN(
analysisBuffer, pTestCase->_correctBuffer[testId].c_str()))
return 0;
return ret;
} }
static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
@@ -1776,6 +1794,13 @@ static void floatRefBuilder(printDataGenParameters& params, char* refResult, con
strtof(params.dataRepresentation, NULL)); strtof(params.dataRepresentation, NULL));
} }
static bool floatRefTest(const char* refResult, const char* analysisBuffer)
{
float test = strtof(analysisBuffer, NULL);
float expected = strtof(refResult, NULL);
return test == expected;
}
static void doubleRefBuilder(printDataGenParameters& params, char* refResult, static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
const size_t refSize) const size_t refSize)
{ {
@@ -1783,6 +1808,13 @@ static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
strtod(params.dataRepresentation, NULL)); strtod(params.dataRepresentation, NULL));
} }
static bool doubleRefTest(const char* refResult, const char* analysisBuffer)
{
double test = strtod(analysisBuffer, NULL);
double expected = strtod(refResult, NULL);
return test == expected;
}
static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
{ {
const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10); const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);

View File

@@ -18,7 +18,7 @@
const char *kernelCode = "__kernel void kernel_empty(){}"; const char *kernelCode = "__kernel void kernel_empty(){}";
REGISTER_TEST(profiling_timebase) REGISTER_TEST_VERSION(profiling_timebase, Version(2, 1))
{ {
Version version = get_device_cl_version(device); Version version = get_device_cl_version(device);
cl_platform_id platform = getPlatformFromDevice(device); cl_platform_id platform = getPlatformFromDevice(device);

View File

@@ -341,9 +341,9 @@ int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
{ {
if ((targetArr[i]) % minAlign != (cl_uint)0) if ((targetArr[i]) % minAlign != (cl_uint)0)
{ {
vlog_error( vlog_error("Error in work-item %zu (of %zu). Expected a multiple "
"Error %zu (of %zu). Expected a multiple of %zx, got %x\n", i, "of 0x%zx, got 0x%x\n",
pClState->m_numThreads, minAlign, targetArr[i]); i, pClState->m_numThreads, minAlign, targetArr[i]);
return -1; return -1;
} }
} }
@@ -371,7 +371,8 @@ int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
{ {
if (targetArr[i] != targetSize) if (targetArr[i] != targetSize)
{ {
vlog_error("Error %zu (of %zu). Expected %d, got %d\n", i, vlog_error(
"Error in work-item %zu (of %zu). Expected %d, got %d\n", i,
pClState->m_numThreads, targetSize, targetArr[i]); pClState->m_numThreads, targetSize, targetArr[i]);
return -1; return -1;
} }
@@ -390,10 +391,11 @@ int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
{ {
if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0) if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
{ {
vlog_error( vlog_error("Error in work-item %zu (of %zu). Expected %zu more "
"Error %zu (of %zu). Expected %zu more than a multiple of " "than a multiple of "
"%zu, got %d \n", "%zu, got %d \n",
i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]); i, pClState->m_numThreads, beforeSize, totSize,
targetArr[i]);
return -1; return -1;
} }
} }

View File

@@ -3,7 +3,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
#define MAX_BUFFERS 5 layout(constant_id = 0) const uint MAX_BUFFERS = 5;
layout(binding = 0) buffer Params layout(binding = 0) buffer Params
{ {

View File

@@ -26,6 +26,7 @@
#endif #endif
#include <assert.h> #include <assert.h>
#include <memory>
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <string.h> #include <string.h>
@@ -79,9 +80,9 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase
VulkanBufferList vkBufferList(1, *vkDevice, bufferSize, VulkanBufferList vkBufferList(1, *vkDevice, bufferSize,
vkExternalMemoryHandleType); vkExternalMemoryHandleType);
VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( std::unique_ptr<VulkanDeviceMemory> vkDeviceMem(new VulkanDeviceMemory(
*vkDevice, vkBufferList[0], memoryTypeList[0], *vkDevice, vkBufferList[0], memoryTypeList[0],
vkExternalMemoryHandleType); vkExternalMemoryHandleType));
vkDeviceMem->bindBuffer(vkBufferList[0], 0); vkDeviceMem->bindBuffer(vkBufferList[0], 0);

View File

@@ -128,12 +128,24 @@ int run_test_with_two_queue(
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout( VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList); vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule); VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice, VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList); vkDescriptorSetLayoutBindingList);
@@ -461,12 +473,24 @@ int run_test_with_one_queue(
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout( VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList); vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule); VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice, VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList); vkDescriptorSetLayoutBindingList);
@@ -764,12 +788,24 @@ int run_test_with_multi_import_same_ctx(
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout( VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList); vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule); VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice, VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList); vkDescriptorSetLayoutBindingList);
@@ -1103,12 +1139,24 @@ int run_test_with_multi_import_diff_ctx(
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
vkDescriptorSetLayoutBindingList.addBinding( vkDescriptorSetLayoutBindingList.addBinding(
1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
VulkanDescriptorSetLayout vkDescriptorSetLayout( VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList); vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkBufferShaderModule); VkSpecializationMapEntry entry;
entry.constantID = 0;
entry.offset = 0;
entry.size = sizeof(uint32_t);
VkSpecializationInfo spec;
spec.mapEntryCount = 1;
spec.pMapEntries = &entry;
spec.dataSize = sizeof(uint32_t);
spec.pData = &numBuffers;
VulkanComputePipeline vkComputePipeline(
vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
VulkanDescriptorPool vkDescriptorPool(vkDevice, VulkanDescriptorPool vkDescriptorPool(vkDevice,
vkDescriptorSetLayoutBindingList); vkDescriptorSetLayoutBindingList);
@@ -1586,7 +1634,7 @@ struct BufferTestBase : public VulkanTestBase
{ {
BufferTestBase(cl_device_id device, cl_context context, BufferTestBase(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems) cl_command_queue queue, cl_int nelems)
: VulkanTestBase(device, context, queue, nelems) : VulkanTestBase(device, context, queue, nelems, true)
{} {}
int test_buffer_common(bool use_fence) int test_buffer_common(bool use_fence)

View File

@@ -37,11 +37,13 @@ inline void params_reset()
struct VulkanTestBase struct VulkanTestBase
{ {
VulkanTestBase(cl_device_id device, cl_context context, VulkanTestBase(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems) cl_command_queue queue, cl_int nelems,
bool useShaderInt8 = false)
: device(device), context(context), num_elems(nelems) : device(device), context(context), num_elems(nelems)
{ {
vkDevice.reset(new VulkanDevice( vkDevice.reset(new VulkanDevice(
getAssociatedVulkanPhysicalDevice(device, useValidationLayers))); getAssociatedVulkanPhysicalDevice(device, useValidationLayers),
getDefaultVulkanQueueFamilyToQueueCountMap(), useShaderInt8));
cl_platform_id platform; cl_platform_id platform;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,