diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 8f2a0872..ec36d73c 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -28,7 +28,7 @@ jobs: arch: android-aarch64 android_arch_abi: arm64-v8a steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Ninja uses: seanmiddleditch/gha-setup-ninja@master - name: Install Arm and AArch64 compilers @@ -184,7 +184,7 @@ jobs: steps: - name: Install packages run: sudo apt install -y clang-format clang-format-14 - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Check code format diff --git a/CMakeLists.txt b/CMakeLists.txt index 30a64447..09fa4854 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,14 +81,17 @@ include(CheckFunctionExists) include(CheckIncludeFiles) include(CheckCXXCompilerFlag) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") - set(CLConform_TARGET_ARCH ARM) -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" OR + CMAKE_VS_PLATFORM_NAME MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set(CLConform_TARGET_ARCH ARM64) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") + set(CLConform_TARGET_ARCH ARM) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") set(CLConform_TARGET_ARCH x86_64) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*") set(CLConform_TARGET_ARCH x86) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*") + set(CLConform_TARGET_ARCH RISCV) endif() if(NOT DEFINED CLConform_TARGET_ARCH) @@ -139,8 +142,12 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" add_cxx_flag_if_supported(-frounding-math) endif() else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") + # Curb the inclusion of SSE headers when compiling for non x86 targets + if(${CLConform_TARGET_ARCH} STREQUAL "x86_64" OR ${CLConform_TARGET_ARCH} + STREQUAL "x86") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") + endif() endif() # Set a module's COMPILE_FLAGS if using gcc or clang. diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp index 5b1e7e03..e06d32aa 100644 --- a/test_common/harness/deviceInfo.cpp +++ b/test_common/harness/deviceInfo.cpp @@ -50,7 +50,7 @@ std::string get_device_info_string(cl_device_id device, } /* Determines if an extension is supported by a device. */ -int is_extension_available(cl_device_id device, const char *extensionName) +bool is_extension_available(cl_device_id device, const char *extensionName) { std::string extString = get_device_extensions_string(device); std::istringstream ss(extString); diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h index c3c8a512..30711b18 100644 --- a/test_common/harness/deviceInfo.h +++ b/test_common/harness/deviceInfo.h @@ -26,7 +26,7 @@ std::string get_device_info_string(cl_device_id device, cl_device_info param_name); /* Determines if an extension is supported by a device. */ -int is_extension_available(cl_device_id device, const char *extensionName); +bool is_extension_available(cl_device_id device, const char *extensionName); /* Returns the version of the extension the device supports or throws an * exception if the extension is not supported by the device. */ diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index cb1a9113..7e59f915 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -120,6 +120,12 @@ static int vlog_win32(const char *format, ...); return retValue; \ } \ } +#define test_object_failure_ret(object, errCode, expectedErrCode, msg, \ + retValue) \ + { \ + test_assert_error_ret(object == nullptr, msg, retValue); \ + test_failure_error_ret(errCode, expectedErrCode, msg, retValue); \ + } #define print_failure_error(errCode, expectedErrCode, msg) \ log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, \ IGetErrorString(errCode), IGetErrorString(expectedErrCode), \ diff --git a/test_common/harness/extensionHelpers.h b/test_common/harness/extensionHelpers.h index e98f67c2..abaa0ba6 100644 --- a/test_common/harness/extensionHelpers.h +++ b/test_common/harness/extensionHelpers.h @@ -32,5 +32,15 @@ } \ } while (false) +#define GET_FUNCTION_EXTENSION_ADDRESS(device, FUNC) \ + FUNC = \ + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( \ + getPlatformFromDevice(device), #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } #endif // _extensionHelpers_h diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index afb0f5a3..9c1e0db0 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type; #elif defined(__PPC__) #include extern __thread fpu_control_t fpu_control; +#elif defined(__riscv) +#define _FPU_MASK_NI 1 +static FPU_mode_type fpu_control; #elif defined(__mips__) #include "mips/m32c1.h" #endif @@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode) || defined(_M_X64) || defined(__MINGW32__) *oldMode = _mm_getcsr(); _mm_setcsr(*oldMode | 0x8040); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) *oldMode = fpu_control; fpu_control |= _FPU_MASK_NI; #elif defined(__arm__) @@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode) || defined(_M_X64) || defined(__MINGW32__) *oldMode = _mm_getcsr(); _mm_setcsr(*oldMode & ~0x8040); -#elif defined(__PPC__) - *mode = fpu_control; +#elif defined(__PPC__) || defined(__riscv) + *oldMode = fpu_control; fpu_control &= ~_FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; @@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode) #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ || defined(_M_X64) || defined(__MINGW32__) _mm_setcsr(*mode); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) fpu_control = *mode; #elif defined(__arm__) __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 8fa9c0a4..637bf640 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -141,7 +141,6 @@ std::string get_kernel_name(const std::string &source) { kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1); kernelsList[kernelsList.size() - 1] = '.'; - kernelsList[kernelsList.size() - 1] = '.'; } oss << kernelsList; } @@ -678,17 +677,18 @@ static int create_single_kernel_helper_create_program_offline( return CL_SUCCESS; } -static int create_single_kernel_helper_create_program( - cl_context context, cl_device_id device, cl_program *outProgram, - unsigned int numKernelLines, const char **kernelProgram, - const char *buildOptions, CompilationMode compilationMode) +int create_single_kernel_helper_create_program(cl_context context, + cl_program *outProgram, + unsigned int numKernelLines, + const char **kernelProgram, + const char *buildOptions) { std::lock_guard compiler_lock(gCompilerMutex); std::string filePrefix = get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions); bool shouldSaveToDisk = should_save_kernel_source_to_disk( - compilationMode, gCompilationCacheMode, gCompilationCachePath, + gCompilationMode, gCompilationCacheMode, gCompilationCachePath, filePrefix); if (shouldSaveToDisk) @@ -701,7 +701,7 @@ static int create_single_kernel_helper_create_program( return -1; } } - if (compilationMode == kOnline) + if (gCompilationMode == kOnline) { int error = CL_SUCCESS; @@ -718,42 +718,11 @@ static int create_single_kernel_helper_create_program( else { return create_single_kernel_helper_create_program_offline( - context, device, outProgram, numKernelLines, kernelProgram, - buildOptions, compilationMode); + context, nullptr, outProgram, numKernelLines, kernelProgram, + buildOptions, gCompilationMode); } } -int create_single_kernel_helper_create_program(cl_context context, - cl_program *outProgram, - unsigned int numKernelLines, - const char **kernelProgram, - const char *buildOptions) -{ - return create_single_kernel_helper_create_program( - context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, - gCompilationMode); -} - -int create_single_kernel_helper_create_program_for_device( - cl_context context, cl_device_id device, cl_program *outProgram, - unsigned int numKernelLines, const char **kernelProgram, - const char *buildOptions) -{ - return create_single_kernel_helper_create_program( - context, device, outProgram, numKernelLines, kernelProgram, - buildOptions, gCompilationMode); -} - -int create_single_kernel_helper_with_build_options( - cl_context context, cl_program *outProgram, cl_kernel *outKernel, - unsigned int numKernelLines, const char **kernelProgram, - const char *kernelName, const char *buildOptions) -{ - return create_single_kernel_helper(context, outProgram, outKernel, - numKernelLines, kernelProgram, - kernelName, buildOptions); -} - // Creates and builds OpenCL C/C++ program, and creates a kernel int create_single_kernel_helper(cl_context context, cl_program *outProgram, cl_kernel *outKernel, @@ -1239,8 +1208,8 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags, { cl_image_format *list; cl_uint count = 0; - cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128, - NULL, &count); + cl_int err = + clGetSupportedImageFormats(context, flags, image_type, 0, NULL, &count); if (count == 0) return 0; list = (cl_image_format *)malloc(count * sizeof(cl_image_format)); @@ -1276,7 +1245,6 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags, return (i < count) ? 1 : 0; } -size_t get_pixel_bytes(const cl_image_format *fmt); size_t get_pixel_bytes(const cl_image_format *fmt) { size_t chanCount; diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index 65167116..97aedb7c 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -58,6 +58,13 @@ void helpInfo() with a very small subset of the tests. This option should not be used for conformance submission (default: disabled). + --invalid-object-scenarios=,.... + Specify different scenarios to use when + testing for object validity. Options can be: + nullptr To use a nullptr (default) + valid_object_wrong_type To use a valid_object which is not the correct type + NOTE: valid_object_wrong_type option is not required for OpenCL conformance. + For offline compilation (binary and spir-v modes) only: --compilation-cache-mode Specify a compilation caching mode: @@ -104,6 +111,7 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) } delArg = 0; + size_t i_object_length = strlen("--invalid-object-scenarios="); if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { @@ -264,6 +272,32 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) return -1; } } + else if (!strncmp(argv[i], + "--invalid-object-scenarios=", i_object_length)) + { + if (strlen(argv[i]) > i_object_length) + { + delArg++; + gInvalidObject = 0; + std::string invalid_objects(argv[i]); + + if (invalid_objects.find("nullptr") != std::string::npos) + { + gInvalidObject |= InvalidObject::Nullptr; + } + if (invalid_objects.find("valid_object_wrong_type") + != std::string::npos) + { + gInvalidObject |= InvalidObject::ValidObjectWrongType; + } + } + else + { + log_error("Program argument for --invalid-object-scenarios was " + "not specified.\n"); + return -1; + } + } // cleaning parameters from argv tab for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg]; diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index 5aeb86f1..31b18f75 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -201,6 +201,7 @@ RoundingMode get_round(void) #elif defined(__mips__) #include "mips/m32c1.h" #endif + void *FlushToZero(void) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) @@ -231,6 +232,8 @@ void *FlushToZero(void) #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); return NULL; +#elif defined(__riscv) + return NULL; #else #error Unknown arch #endif @@ -266,6 +269,8 @@ void UnFlushToZero(void *p) _FPU_SETCW(flags); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); +#elif defined(__riscv) + return; #else #error Unknown arch #endif diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index c745a639..92729c49 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "testHarness.h" +#include "stringHelpers.h" #include "compat.h" #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +35,8 @@ #include "imageHelpers.h" #include "parseParameters.h" +namespace fs = std::filesystem; + #if !defined(_WIN32) #include #include @@ -60,6 +64,7 @@ int gInfNanSupport = 1; int gIsEmbedded = 0; int gHasLong = 1; bool gCoreILProgram = true; +int gInvalidObject = InvalidObject::Nullptr; #define DEFAULT_NUM_ELEMENTS 0x4000 @@ -94,11 +99,25 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[], return EXIT_SUCCESS; } - FILE *file = fopen(fileName, "w"); + fs::path file_path(fileName); + + // When running under Bazel test, prepend the Bazel output directory to + // the provided path + if (nullptr != getenv("BAZEL_TEST")) + { + char *bazel_output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR"); + if (nullptr != bazel_output_dir) + { + file_path = fs::path(bazel_output_dir) / file_path; + } + } + + auto file_path_str = to_string(file_path.u8string()); + FILE *file = fopen(file_path_str.c_str(), "w"); if (NULL == file) { log_error("ERROR: Failed to open '%s' for writing results.\n", - fileName); + file_path_str.c_str()); return EXIT_FAILURE; } @@ -127,7 +146,8 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[], int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS; - log_info("Saving results to %s: %s!\n", fileName, save_map[ret]); + log_info("Saving results to %s: %s!\n", file_path_str.c_str(), + save_map[ret]); return ret; } @@ -308,6 +328,8 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n", fileName != NULL ? fileName : ""); log_info("\t to save results to JSON file.\n"); + log_info("\t When running in Bazel test this is relative to " + "$TEST_UNDECLARED_OUTPUTS_DIR.\n"); log_info("\n"); log_info("Test names:\n"); @@ -1408,6 +1430,8 @@ void PrintArch(void) vlog("ARCH:\tWindows\n"); #elif defined(__mips__) vlog("ARCH:\tmips\n"); +#elif defined(__riscv) + vlog("ARCH:\tRISC-V\n"); #else #error unknown arch #endif diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index cc9d8212..76fda76f 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -22,6 +22,7 @@ #include #include +#include class Version { public: @@ -257,6 +258,37 @@ extern std::string get_platform_info_string(cl_platform_id platform, cl_platform_info param_name); extern bool is_platform_extension_available(cl_platform_id platform, const char *extensionName); +enum InvalidObject +{ + Nullptr = 1 << 0, + ValidObjectWrongType = 1 << 1, +}; + +extern int gInvalidObject; + + +template std::vector get_invalid_objects(cl_device_id device) +{ + std::vector ret; + if ((gInvalidObject & InvalidObject::Nullptr) + && !(std::is_same::value)) + { + ret.push_back(nullptr); + } + if (gInvalidObject & InvalidObject::ValidObjectWrongType) + { + if (std::is_same::value) + { + cl_platform_id platform = getPlatformFromDevice(device); + ret.push_back(reinterpret_cast(platform)); + } + else + { + ret.push_back(reinterpret_cast(device)); + } + } + return ret; +} #if !defined(__APPLE__) void memset_pattern4(void *, const void *, size_t); diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt index b781e49b..fed0aace 100644 --- a/test_conformance/api/CMakeLists.txt +++ b/test_conformance/api/CMakeLists.txt @@ -6,7 +6,10 @@ set(${MODULE_NAME}_SOURCES main.cpp negative_platform.cpp negative_queue.cpp + negative_context.cpp + negative_enqueue_marker.cpp negative_enqueue_map_image.cpp + negative_device.cpp test_api_consistency.cpp test_bool.cpp test_retain.cpp diff --git a/test_conformance/api/negative_context.cpp b/test_conformance/api/negative_context.cpp new file mode 100644 index 00000000..23ace048 --- /dev/null +++ b/test_conformance/api/negative_context.cpp @@ -0,0 +1,290 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "testBase.h" + +/* Negative Tests for clCreateContext */ +REGISTER_TEST(negative_create_context) +{ + cl_context_properties props[3] = { + CL_CONTEXT_PLATFORM, reinterpret_cast(nullptr), 0 + }; + cl_int err = 0; + cl_context ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PLATFORM, + "clCreateContext should return CL_INVALID_PLATFORM when:\"an invalid " + "platform object is used with the CL_CONTEXT_PLATFORM property\" using " + "a nullptr", + TEST_FAIL); + + props[0] = reinterpret_cast("INVALID_PROPERTY"); + + props[1] = reinterpret_cast(nullptr); + ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContext should return CL_INVALID_PROPERTY when: \"context " + "property name in properties is not a supported property name\"", + TEST_FAIL); + + if (get_device_cl_version(device) >= Version(1, 2)) + { + cl_context_properties invalid_value{ -1 }; + props[0] = CL_CONTEXT_INTEROP_USER_SYNC; + props[1] = invalid_value; + ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContext should return CL_INVALID_PROPERTY when: \"the " + "value specified for a supported property name is not valid\"", + TEST_FAIL); + + cl_bool property_value = CL_FALSE; + cl_context_properties duplicated_property[5] = { + CL_CONTEXT_INTEROP_USER_SYNC, + static_cast(property_value), + CL_CONTEXT_INTEROP_USER_SYNC, + static_cast(property_value), 0 + }; + ctx = clCreateContext(duplicated_property, 1, &device, nullptr, nullptr, + &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContext should return CL_INVALID_PROPERTY when: \"the " + "same property name is specified more than once\"", + TEST_FAIL); + } + + ctx = clCreateContext(nullptr, 1, nullptr, nullptr, nullptr, &err); + test_object_failure_ret(ctx, err, CL_INVALID_VALUE, + "clCreateContext should return CL_INVALID_VALUE " + "when: \"devices is NULL\"", + TEST_FAIL); + + ctx = clCreateContext(nullptr, 0, &device, nullptr, nullptr, &err); + test_object_failure_ret(ctx, err, CL_INVALID_VALUE, + "clCreateContext should return CL_INVALID_VALUE " + "when: \"num_devices is equal to zero\"", + TEST_FAIL); + + int user_data = 1; // Arbitrary non-NULL value + ctx = clCreateContext(nullptr, 1, &device, nullptr, &user_data, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_VALUE, + "clCreateContext should return CL_INVALID_VALUE when: \"pfn_notify is " + "NULL but user_data is not NULL\"", + TEST_FAIL); + + cl_device_id invalid_device = nullptr; + ctx = clCreateContext(nullptr, 1, &invalid_device, nullptr, nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_DEVICE, + "clCreateContext should return CL_INVALID_DEVICE when: \"any device in " + "devices is not a valid device\" using a device set to nullptr", + TEST_FAIL); + + return TEST_PASS; +} + +/* Negative Tests for clCreateContextFromType */ +REGISTER_TEST(negative_create_context_from_type) +{ + cl_platform_id platform = getPlatformFromDevice(device); + + cl_context_properties props[5] = { + CL_CONTEXT_PLATFORM, reinterpret_cast(nullptr), + 0, 0, 0 + }; + cl_int err = 0; + cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, + nullptr, nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PLATFORM, + "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an " + "invalid platform object is used with the CL_CONTEXT_PLATFORM " + "property\" using a nullptr", + TEST_FAIL); + + ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr, + nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PLATFORM, + "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an " + "invalid platform object is used with the CL_CONTEXT_PLATFORM " + "property\" using a valid object that is NOT a platform", + TEST_FAIL); + + props[1] = reinterpret_cast(platform); + props[2] = reinterpret_cast("INVALID_PROPERTY"); + props[3] = reinterpret_cast(nullptr); + + ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr, + nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContextFromType should return CL_INVALID_PROPERTY when: " + "\"context property name in properties is not a supported property " + "name\"", + TEST_FAIL); + + if (get_device_cl_version(device) >= Version(1, 2)) + { + cl_context_properties invalid_value{ -1 }; + props[2] = CL_CONTEXT_INTEROP_USER_SYNC; + props[3] = invalid_value; + ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr, + nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContextFromType should return CL_INVALID_PROPERTY when: " + "\"the value specified for a supported property name is not " + "valid\"", + TEST_FAIL); + + props[2] = CL_CONTEXT_PLATFORM; + props[3] = reinterpret_cast(platform); + ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr, + nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_PROPERTY, + "clCreateContextFromType should return CL_INVALID_PROPERTY when: " + "\"the same property name is specified more than once\"", + TEST_FAIL); + } + + int user_data = 1; // Arbitrary non-NULL value + ctx = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_DEFAULT, nullptr, + &user_data, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_VALUE, + "clCreateContextFromType should return CL_INVALID_VALUE when: " + "\"pfn_notify is NULL but user_data is not NULL\"", + TEST_FAIL); + + cl_device_type INVALID_DEVICE_TYPE = 0; + ctx = clCreateContextFromType(nullptr, INVALID_DEVICE_TYPE, nullptr, + nullptr, &err); + test_object_failure_ret( + ctx, err, CL_INVALID_DEVICE_TYPE, + "clCreateContextFromType should return CL_INVALID_DEVICE_TYPE when: " + "\"device_type is not a valid value\"", + TEST_FAIL); + + std::vector device_types = { CL_DEVICE_TYPE_CPU, + CL_DEVICE_TYPE_GPU, + CL_DEVICE_TYPE_ACCELERATOR }; + if (get_device_cl_version(device) >= Version(1, 2)) + { + device_types.push_back(CL_DEVICE_TYPE_CUSTOM); + } + for (auto type : device_types) + { + clContextWrapper tmp_context = + clCreateContextFromType(nullptr, type, nullptr, nullptr, &err); + if (err != CL_SUCCESS) + { + test_object_failure_ret( + tmp_context, err, CL_DEVICE_NOT_FOUND, + "clCreateContextFromType should return CL_DEVICE_NOT_FOUND " + "when: \"no devices that match device_type and property values " + "specified in properties are currently available\"", + TEST_FAIL); + break; + } + } + + return TEST_PASS; +} + +/* Negative Tests for clRetainContext */ +REGISTER_TEST(negative_retain_context) +{ + cl_int err = clRetainContext(nullptr); + test_failure_error_ret( + err, CL_INVALID_CONTEXT, + "clRetainContext should return CL_INVALID_CONTEXT when: \"context is " + "not a valid OpenCL context\" using a nullptr", + TEST_FAIL); + + return TEST_PASS; +} + +/* Negative Tests for clReleaseContext */ +REGISTER_TEST(negative_release_context) +{ + cl_int err = clReleaseContext(nullptr); + test_failure_error_ret( + err, CL_INVALID_CONTEXT, + "clReleaseContext should return CL_INVALID_CONTEXT when: \"context is " + "not a valid OpenCL context\" using a nullptr", + TEST_FAIL); + + return TEST_PASS; +} + +/* Negative Tests for clGetContextInfo */ +REGISTER_TEST(negative_get_context_info) +{ + + cl_uint param_value = 0; + cl_int err = clGetContextInfo(nullptr, CL_CONTEXT_REFERENCE_COUNT, + sizeof(param_value), ¶m_value, nullptr); + test_failure_error_ret( + err, CL_INVALID_CONTEXT, + "clGetContextInfo should return CL_INVALID_CONTEXT when: \"context is " + "not a valid context\" using a nullptr", + TEST_FAIL); + + cl_context_info INVALID_PARAM_VALUE = 0; + err = clGetContextInfo(context, INVALID_PARAM_VALUE, 0, nullptr, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetContextInfo should return CL_INVALID_VALUE when: \"param_name is " + "not one of the supported values\"", + TEST_FAIL); + + err = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, + sizeof(param_value) - 1, ¶m_value, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetContextInfo should return CL_INVALID_VALUE when: \"size in bytes " + "specified by param_value_size is < size of return type and " + "param_value is not a NULL value\"", + TEST_FAIL); + + return TEST_PASS; +} + +/* Negative Tests for clSetContextDestructorCallback */ +static void CL_CALLBACK callback(cl_context context, void* user_data) {} + +REGISTER_TEST_VERSION(negative_set_context_destructor_callback, Version(3, 0)) +{ + cl_int err = clSetContextDestructorCallback(nullptr, callback, nullptr); + test_failure_error_ret( + err, CL_INVALID_CONTEXT, + "clSetContextDestructorCallback should return CL_INVALID_CONTEXT when: " + "\"context is not a valid context\" using a nullptr", + TEST_FAIL); + + err = clSetContextDestructorCallback(context, nullptr, nullptr); + test_failure_error_ret(err, CL_INVALID_VALUE, + "clSetContextDestructorCallback should return " + "CL_INVALID_VALUE when: \"pfn_notify is NULL\"", + TEST_FAIL); + + return TEST_PASS; +} diff --git a/test_conformance/api/negative_device.cpp b/test_conformance/api/negative_device.cpp new file mode 100644 index 00000000..d51c685c --- /dev/null +++ b/test_conformance/api/negative_device.cpp @@ -0,0 +1,526 @@ +// +// Copyright (c) 2021 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testBase.h" +#include "harness/testHarness.h" +#include + +/* Negative Tests for clGetDeviceInfo */ +REGISTER_TEST(negative_get_device_info) +{ + + cl_device_type device_type = 0; + cl_int err(CL_SUCCESS); + for (auto invalid_device : get_invalid_objects(device)) + { + err = clGetDeviceInfo(invalid_device, CL_DEVICE_TYPE, + sizeof(device_type), &device_type, nullptr); + test_failure_error_ret(err, CL_INVALID_DEVICE, + "clGetDeviceInfo should return " + "CL_INVALID_DEVICE when: \"device is not " + "a valid device\"", + TEST_FAIL); + } + + constexpr cl_device_info INVALID_PARAM_VALUE = 0; + err = clGetDeviceInfo(device, INVALID_PARAM_VALUE, 0, nullptr, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetDeviceInfo should return CL_INVALID_VALUE when: \"param_name is " + "not one of the supported values\"", + TEST_FAIL); + + err = clGetDeviceInfo(device, CL_DEVICE_TYPE, 0, &device_type, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetDeviceInfo should return CL_INVALID_VALUE when: \"size in bytes " + "specified by param_value_size is < size of return type and " + "param_value is not a NULL value\"", + TEST_FAIL); + + return TEST_PASS; +} + +/* Negative Tests for clGetDeviceIDs */ +REGISTER_TEST(negative_get_device_ids) +{ + cl_platform_id platform = getPlatformFromDevice(device); + + cl_device_id devices = nullptr; + + cl_int err(CL_SUCCESS); + for (auto invalid_platform : get_invalid_objects(device)) + { + err = clGetDeviceIDs(invalid_platform, CL_DEVICE_TYPE_DEFAULT, 1, + &devices, nullptr); + test_failure_error_ret(err, CL_INVALID_PLATFORM, + "clGetDeviceIDs should return " + "CL_INVALID_PLATFORM when: \"platform is " + "not a valid platform\"", + TEST_FAIL); + } + + cl_device_type INVALID_DEVICE_TYPE = 0; + err = clGetDeviceIDs(platform, INVALID_DEVICE_TYPE, 1, &devices, nullptr); + test_failure_error_ret( + err, CL_INVALID_DEVICE_TYPE, + "clGetDeviceIDs should return CL_INVALID_DEVICE_TYPE when: " + "\"device_type is not a valid value\"", + TEST_FAIL); + + err = + clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 0, &devices, nullptr); + test_failure_error_ret(err, CL_INVALID_VALUE, + "clGetDeviceIDs should return when: \"num_entries " + "is equal to zero and devices is not NULL\"", + TEST_FAIL); + + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, nullptr, nullptr); + test_failure_error_ret(err, CL_INVALID_VALUE, + "clGetDeviceIDs should return CL_INVALID_VALUE " + "when: \"both num_devices and devices are NULL\"", + TEST_FAIL); + + devices = nullptr; + std::vector device_types{ CL_DEVICE_TYPE_CPU, + CL_DEVICE_TYPE_GPU, + CL_DEVICE_TYPE_ACCELERATOR }; + if (get_device_cl_version(device) >= Version(1, 2)) + { + device_types.push_back(CL_DEVICE_TYPE_CUSTOM); + } + + bool platform_supports_all_device_types = true; + for (auto device_type : device_types) + { + err = clGetDeviceIDs(platform, device_type, 1, &devices, nullptr); + if (err == CL_SUCCESS) + { + continue; + } + platform_supports_all_device_types = false; + break; + } + if (platform_supports_all_device_types) + { + log_info("Platform has every Device Type... Skipping Test\n"); + } + else + { + test_failure_error_ret( + err, CL_DEVICE_NOT_FOUND, + "clGetDeviceIDs should return CL_DEVICE_NOT_FOUND when: \"no " + "OpenCL devices that matched device_type were found\"", + TEST_FAIL); + } + + return TEST_PASS; +} + +/* Negative Tests for clGetDeviceAndHostTimer */ +REGISTER_TEST_VERSION(negative_get_device_and_host_timer, Version(2, 1)) +{ + cl_ulong *device_timestamp = nullptr, *host_timestamp = nullptr; + cl_int err = CL_SUCCESS; + for (auto invalid_device : get_invalid_objects(device)) + { + err = clGetDeviceAndHostTimer(invalid_device, device_timestamp, + host_timestamp); + test_failure_error_ret( + err, CL_INVALID_DEVICE, + "clGetDeviceAndHostTimer should return CL_INVALID_DEVICE when: " + "\"device is not a valid device\"", + TEST_FAIL); + } + + cl_platform_id platform = getPlatformFromDevice(device); + + // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must + // support timer synchronisation + cl_ulong timer_resolution = 1; + auto device_version = get_device_cl_version(device); + err = + clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION, + sizeof(timer_resolution), &timer_resolution, nullptr); + test_error(err, "clGetPlatformInfo failed"); + if (timer_resolution == 0 + && (device_version == Version(2, 1) || device_version == Version(2, 2))) + { + log_error("Support for device and host timer synchronization is " + "required for platforms supporting OpenCL 2.1 or 2.2."); + return TEST_FAIL; + } + + if (timer_resolution != 0) + { + log_info("Platform Supports Timers\n"); + log_info("Skipping CL_INVALID_OPERATION tests\n"); + + err = clGetDeviceAndHostTimer(device, nullptr, host_timestamp); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: " + "\"host_timestamp or device_timestamp is NULL\" using nullptr for " + "device_timestamp ", + TEST_FAIL); + + err = clGetDeviceAndHostTimer(device, device_timestamp, nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: " + "\"host_timestamp or device_timestamp is NULL\" using nullptr for " + "host_timestamp ", + TEST_FAIL); + } + else + { + log_info("Platform does not Support Timers\n"); + log_info("Skipping CL_INVALID_VALUE tests\n"); + + err = clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp); + test_failure_error_ret( + err, CL_INVALID_OPERATION, + "clGetDeviceAndHostTimer should return CL_INVALID_OPERATION when: " + "\"the platform associated with device does not support device and " + "host timer synchronization\"", + TEST_FAIL); + } + + return TEST_PASS; +} + +/* Negative Tests for clGetHostTimer */ +REGISTER_TEST_VERSION(negative_get_host_timer, Version(2, 1)) +{ + cl_ulong host_timestamp = 0; + cl_int err = CL_SUCCESS; + for (auto invalid_device : get_invalid_objects(device)) + { + err = clGetHostTimer(invalid_device, &host_timestamp); + test_failure_error_ret(err, CL_INVALID_DEVICE, + "clGetHostTimer should return CL_INVALID_DEVICE " + "when: \"device is not " + "a valid device\"", + TEST_FAIL); + } + + cl_platform_id platform = getPlatformFromDevice(device); + // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must + // support timer synchronisation + cl_ulong timer_resolution = 1; + auto device_version = get_device_cl_version(device); + err = + clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION, + sizeof(timer_resolution), &timer_resolution, nullptr); + test_error(err, "clGetPlatformInfo failed"); + if (timer_resolution == 0 + && (device_version == Version(2, 1) || device_version == Version(2, 2))) + { + log_error("Support for device and host timer synchronization is " + "required for platforms supporting OpenCL 2.1 or 2.2."); + return TEST_FAIL; + } + + if (timer_resolution != 0) + { + log_info("Platform Supports Timers\n"); + log_info("Skipping CL_INVALID_OPERATION tests\n"); + + err = clGetHostTimer(device, nullptr); + test_failure_error_ret(err, CL_INVALID_VALUE, + "clGetHostTimer should return CL_INVALID_VALUE " + "when: \"host_timestamp is NULL\"", + TEST_FAIL); + } + else + { + log_info("Platform does not Support Timers\n"); + log_info("Skipping CL_INVALID_VALUE tests\n"); + + err = clGetHostTimer(device, &host_timestamp); + test_failure_error_ret( + err, CL_INVALID_OPERATION, + "clGetHostTimer should return CL_INVALID_OPERATION when: \"the " + "platform associated with device does not support device and host " + "timer synchronization\"", + TEST_FAIL); + } + + return TEST_PASS; +} + +/* Negative Tests for clCreateSubDevices */ +enum SupportedPartitionSchemes +{ + None = 0, + Equally = 1 << 0, + Counts = 1 << 1, + Affinity = 1 << 2, + All_Schemes = Affinity | Counts | Equally, +}; + +static int get_supported_properties(cl_device_id device) +{ + size_t number_of_properties = 0; + int err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, 0, + nullptr, &number_of_properties); + test_error(err, "clGetDeviceInfo"); + std::vector supported_properties( + number_of_properties / sizeof(cl_device_partition_property)); + err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, + number_of_properties, &supported_properties.front(), + nullptr); + test_error(err, "clGetDeviceInfo"); + int ret = SupportedPartitionSchemes::None; + for (auto property : supported_properties) + { + switch (property) + { + case CL_DEVICE_PARTITION_EQUALLY: + ret |= SupportedPartitionSchemes::Equally; + break; + case CL_DEVICE_PARTITION_BY_COUNTS: + ret |= SupportedPartitionSchemes::Counts; + break; + case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: + ret |= SupportedPartitionSchemes::Affinity; + break; + default: break; + } + } + return ret; +} + +static std::vector +get_invalid_properties(int unsupported_properties) +{ + if (unsupported_properties & SupportedPartitionSchemes::Equally) + { + return { CL_DEVICE_PARTITION_EQUALLY, 1, 0 }; + } + else if (unsupported_properties & SupportedPartitionSchemes::Counts) + { + return { CL_DEVICE_PARTITION_BY_COUNTS, 1, + CL_DEVICE_PARTITION_BY_COUNTS_LIST_END }; + } + else if (unsupported_properties & SupportedPartitionSchemes::Affinity) + { + return { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0 }; + } + else + { + return {}; + } +} + +static cl_uint get_uint_device_info(const cl_device_id device, + const cl_device_info param_name) +{ + cl_uint ret = 0; + cl_int err = + clGetDeviceInfo(device, param_name, sizeof(ret), &ret, nullptr); + test_error(err, "clGetDeviceInfo"); + return ret; +} + +REGISTER_TEST_VERSION(negative_create_sub_devices, Version(1, 2)) +{ + int supported_properties = get_supported_properties(device); + if (supported_properties == SupportedPartitionSchemes::None) + { + printf("Device does not support creating subdevices... Skipping\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_device_partition_property properties[4] = {}; + cl_uint max_compute_units = + get_uint_device_info(device, CL_DEVICE_MAX_COMPUTE_UNITS); + cl_uint max_sub_devices = + get_uint_device_info(device, CL_DEVICE_PARTITION_MAX_SUB_DEVICES); + std::vector out_devices; + cl_uint max_for_partition = 0; + if (supported_properties & SupportedPartitionSchemes::Equally) + { + properties[0] = CL_DEVICE_PARTITION_EQUALLY; + properties[1] = 1; + properties[2] = 0; + out_devices.resize(static_cast(max_compute_units)); + max_for_partition = max_compute_units; + } + else if (supported_properties & SupportedPartitionSchemes::Counts) + { + properties[0] = CL_DEVICE_PARTITION_BY_COUNTS; + properties[1] = 1; + properties[2] = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END; + out_devices.resize(static_cast(max_sub_devices)); + max_for_partition = max_sub_devices; + } + else + { + properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; + properties[2] = 0; + } + + properties[3] = 0; + + cl_int err(CL_SUCCESS); + for (auto invalid_device : get_invalid_objects(device)) + { + err = clCreateSubDevices(invalid_device, properties, out_devices.size(), + out_devices.data(), nullptr); + test_failure_error_ret(err, CL_INVALID_DEVICE, + "clCreateSubDevices should return " + "CL_INVALID_DEVICE when: \"in_device " + "is not a valid device\"", + TEST_FAIL); + } + + err = clCreateSubDevices(device, nullptr, out_devices.size(), + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clCreateSubDevices should return CL_INVALID_VALUE when: \"values " + "specified in properties are not valid\" using a nullptr", + TEST_FAIL); + + err = + clCreateSubDevices(device, properties, 0, out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clCreateSubDevices should return CL_INVALID_VALUE when: \"out_devices " + "is not NULL and num_devices is less than the number of sub-devices " + "created by the partition scheme\"", + TEST_FAIL); + + if (supported_properties != SupportedPartitionSchemes::All_Schemes) + { + std::vector invalid_properties = + get_invalid_properties(supported_properties + ^ SupportedPartitionSchemes::All_Schemes); + err = + clCreateSubDevices(device, invalid_properties.data(), + out_devices.size(), out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clCreateSubDevices should return CL_INVALID_VALUE when: \"values " + "specified in properties are valid but not supported by the " + "device\"", + TEST_FAIL); + } + + if (supported_properties & SupportedPartitionSchemes::Equally) + { + properties[1] = max_compute_units; + err = clCreateSubDevices(device, properties, max_for_partition, + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_DEVICE_PARTITION_FAILED, + "clCreateSubDevices should return CL_DEVICE_PARTITION_FAILED when: " + "\"the partition name is supported by the implementation but " + "in_device could not be further partitioned\"", + TEST_FAIL); + } + + constexpr cl_device_partition_property INVALID_PARTITION_PROPERTY = + -1; // Aribitrary Invalid number + properties[0] = INVALID_PARTITION_PROPERTY; + err = clCreateSubDevices(device, properties, out_devices.size(), + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_VALUE, + "clCreateSubDevices should return CL_INVALID_VALUE when: \"values " + "specified in properties are not valid\" using an invalid property", + TEST_FAIL); + + if (supported_properties & SupportedPartitionSchemes::Counts) + { + properties[0] = CL_DEVICE_PARTITION_BY_COUNTS; + properties[1] = max_sub_devices + 1; + err = clCreateSubDevices(device, properties, max_sub_devices + 1, + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_DEVICE_PARTITION_COUNT, + "clCreateSubDevices should return " + "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the partition name " + "specified in properties is CL_DEVICE_ PARTITION_BY_COUNTS and the " + "number of sub-devices requested exceeds " + "CL_DEVICE_PARTITION_MAX_SUB_DEVICES\"", + TEST_FAIL); + + properties[1] = -1; + err = clCreateSubDevices(device, properties, out_devices.size(), + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_DEVICE_PARTITION_COUNT, + "clCreateSubDevices should return " + "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the number of compute " + "units requested for one or more sub-devices is less than zero\"", + TEST_FAIL); + } + + if (supported_properties & SupportedPartitionSchemes::Equally) + { + properties[0] = CL_DEVICE_PARTITION_EQUALLY; + properties[1] = max_compute_units + 1; + err = clCreateSubDevices(device, properties, max_compute_units + 1, + out_devices.data(), nullptr); + test_failure_error_ret( + err, CL_INVALID_DEVICE_PARTITION_COUNT, + "clCreateSubDevices should return " + "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the total number of " + "compute units requested exceeds CL_DEVICE_MAX_COMPUTE_UNITS for " + "in_device\"", + TEST_FAIL); + } + + return TEST_PASS; +} + +/* Negative Tests for clRetainDevice */ +REGISTER_TEST_VERSION(negative_retain_device, Version(1, 2)) +{ + cl_int err(CL_SUCCESS); + for (auto invalid_device : get_invalid_objects(device)) + { + err = clRetainDevice(invalid_device); + test_failure_error_ret(err, CL_INVALID_DEVICE, + "clRetainDevice should return CL_INVALID_DEVICE " + "when: \"device is not " + "a valid device\"", + TEST_FAIL); + } + + return TEST_PASS; +} + +/* Negative Tests for clReleaseDevice */ +REGISTER_TEST_VERSION(negative_release_device, Version(1, 2)) +{ + cl_int err(CL_SUCCESS); + for (auto invalid_device : get_invalid_objects(device)) + { + err = clReleaseDevice(invalid_device); + test_failure_error_ret(err, CL_INVALID_DEVICE, + "clReleaseDevice should return " + "CL_INVALID_DEVICE when: \"device is not " + "a valid device\"", + TEST_FAIL); + } + + return TEST_PASS; +} diff --git a/test_conformance/api/negative_enqueue_marker.cpp b/test_conformance/api/negative_enqueue_marker.cpp new file mode 100644 index 00000000..182b3dbf --- /dev/null +++ b/test_conformance/api/negative_enqueue_marker.cpp @@ -0,0 +1,97 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "testBase.h" +#include "harness/typeWrappers.h" + +REGISTER_TEST(negative_enqueue_marker_with_wait_list) +{ + cl_platform_id platform = getPlatformFromDevice(device); + cl_context_properties props[3] = { + CL_CONTEXT_PLATFORM, reinterpret_cast(platform), + 0 + }; + + cl_int err = CL_SUCCESS; + clContextWrapper ctx = + clCreateContext(props, 1, &device, nullptr, nullptr, &err); + test_error(err, "clCreateContext failed"); + + cl_event ret_event = nullptr; + + err = clEnqueueMarkerWithWaitList(nullptr, 0, nullptr, &ret_event); + test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE, + "clEnqueueMarkerWithWaitList should return " + "CL_INVALID_COMMAND_QUEUE when: \"command_queue is " + "not a valid host command-queue\" using a nullptr", + TEST_FAIL); + test_assert_error(ret_event == nullptr, + "if clEnqueueMarkerWithWaitList failed, no ret_event " + "should be created"); + + clEventWrapper different_ctx_event = clCreateUserEvent(ctx, &err); + test_error(err, "clCreateUserEvent failed"); + + err = + clEnqueueMarkerWithWaitList(queue, 1, &different_ctx_event, &ret_event); + test_failure_error_ret( + err, CL_INVALID_CONTEXT, + "clEnqueueMarkerWithWaitList should return CL_INVALID_CONTEXT when: " + "\"The context of both the command queue and the events in ret_event " + "wait list are not the same\"", + TEST_FAIL); + test_assert_error(ret_event == nullptr, + "if clEnqueueMarkerWithWaitList failed, no ret_event " + "should be created"); + + err = clEnqueueMarkerWithWaitList(queue, 1, nullptr, &ret_event); + test_failure_error_ret( + err, CL_INVALID_EVENT_WAIT_LIST, + "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST " + "when: \"num_events_in_wait_list > 0 but event_wait_list is NULL\"", + TEST_FAIL); + test_assert_error(ret_event == nullptr, + "if clEnqueueMarkerWithWaitList failed, no ret_event " + "should be created"); + + + clEventWrapper event = clCreateUserEvent(context, &err); + test_error(err, "clCreateUserEvent failed"); + + err = clEnqueueMarkerWithWaitList(queue, 0, &event, &ret_event); + test_failure_error_ret( + err, CL_INVALID_EVENT_WAIT_LIST, + "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST " + "when: \"num_events_in_wait_list is 0 but event_wait_list is not " + "NULL\"", + TEST_FAIL); + test_assert_error(ret_event == nullptr, + "if clEnqueueMarkerWithWaitList failed, no ret_event " + "should be created"); + + cl_event invalid_event_wait_list[] = { nullptr }; + err = clEnqueueMarkerWithWaitList(queue, 1, invalid_event_wait_list, + &ret_event); + test_failure_error_ret( + err, CL_INVALID_EVENT_WAIT_LIST, + "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST " + "when: \"event objects in event_wait_list are not valid events\"", + TEST_FAIL); + test_assert_error(ret_event == nullptr, + "if clEnqueueMarkerWithWaitList failed, no ret_event " + "should be created"); + + return TEST_PASS; +} diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp index f98ec13a..6c5922c5 100644 --- a/test_conformance/api/negative_platform.cpp +++ b/test_conformance/api/negative_platform.cpp @@ -40,9 +40,20 @@ REGISTER_TEST(negative_get_platform_info) { cl_platform_id platform = getPlatformFromDevice(device); + cl_int err(CL_SUCCESS); + for (auto invalid_platform : get_invalid_objects(device)) + { + err = clGetPlatformInfo(invalid_platform, CL_PLATFORM_VERSION, + sizeof(char*), nullptr, nullptr); + test_failure_error_ret(err, CL_INVALID_PLATFORM, + "clGetPlatformInfo should return " + "CL_INVALID_PLATFORM when: \"platform " + "is not a valid platform\"", + TEST_FAIL); + } + constexpr cl_platform_info INVALID_PARAM_VALUE = 0; - cl_int err = - clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr); + err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr); test_failure_error_ret( err, CL_INVALID_VALUE, "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name " diff --git a/test_conformance/api/negative_queue.cpp b/test_conformance/api/negative_queue.cpp index 7da68b32..8959ca0b 100644 --- a/test_conformance/api/negative_queue.cpp +++ b/test_conformance/api/negative_queue.cpp @@ -349,24 +349,34 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1)) } { - constexpr cl_queue_properties props[] = { - CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 - }; - clCommandQueueWrapper not_on_device_queue = - clCreateCommandQueueWithProperties(context, device, props, &err); - test_error_fail(err, "clCreateCommandQueueWithProperties failed"); - err = clSetDefaultDeviceCommandQueue(context, device, - not_on_device_queue); - if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE) + cl_command_queue_properties queue_properties; + err = + clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, + sizeof(queue_properties), &queue_properties, NULL); + test_error(err, "Unable to query CL_DEVICE_QUEUE_PROPERTIES"); + if (queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n", - "clSetDefaultDeviceCommandQueue should return " - "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: " - "\"command_queue is not a valid command-queue for " - "device\" using a command queue that is not on device", - IGetErrorString(err), - "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE", - __FILE__, __LINE__); + constexpr cl_queue_properties props[] = { + CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 + }; + clCommandQueueWrapper not_on_device_queue = + clCreateCommandQueueWithProperties(context, device, props, + &err); + test_error_fail(err, "clCreateCommandQueueWithProperties failed"); + err = clSetDefaultDeviceCommandQueue(context, device, + not_on_device_queue); + if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE) + { + log_error( + "ERROR: %s! (Got %s, expected (%s) from %s:%d)\n", + "clSetDefaultDeviceCommandQueue should return " + "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: " + "\"command_queue is not a valid command-queue for " + "device\" using a command queue that is not on device", + IGetErrorString(err), + "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE", + __FILE__, __LINE__); + } } } diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index 29677623..fd8c3ee5 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1526,7 +1526,7 @@ REGISTER_TEST(min_max_constant_buffer_size) size_t threads[1], localThreads[1]; cl_int *constantData, *resultData; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; - int i; + size_t i; cl_event event; cl_int event_status; MTdata d; @@ -1556,6 +1556,8 @@ REGISTER_TEST(min_max_constant_buffer_size) maxAllocSize = get_device_info_max_mem_alloc_size( device, MAX_DEVICE_MEMORY_SIZE_DIVISOR); + log_info("Reported max alloc size of %" PRIu64 " bytes.\n", + (uint64_t)maxAllocSize); if (maxSize > maxAllocSize) maxSize = maxAllocSize; @@ -1590,7 +1592,7 @@ REGISTER_TEST(min_max_constant_buffer_size) return EXIT_FAILURE; } - for (i = 0; i < (int)(numberOfInts); i++) + for (i = 0; i < numberOfInts; i++) constantData[i] = (int)genrand_int32(d); clMemWrapper streams[3]; @@ -1678,11 +1680,11 @@ REGISTER_TEST(min_max_constant_buffer_size) sizeToAllocate, resultData, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); - for (i = 0; i < (int)(numberOfInts); i++) + for (i = 0; i < numberOfInts; i++) if (constantData[i] != resultData[i]) { - log_error("Data failed to verify: constantData[%d]=%d != " - "resultData[%d]=%d\n", + log_error("Data failed to verify: constantData[%zu]=%d != " + "resultData[%zu]=%d\n", i, constantData[i], i, resultData[i]); free(constantData); free(resultData); diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp index 90b302b2..47b151b0 100644 --- a/test_conformance/api/test_kernel_arg_info.cpp +++ b/test_conformance/api/test_kernel_arg_info.cpp @@ -487,10 +487,10 @@ compare_kernel_with_expected(cl_context context, cl_device_id device, int failed_tests = 0; clKernelWrapper kernel; clProgramWrapper program; - cl_int err = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", - get_build_options(device).c_str()); - test_error(err, "create_single_kernel_helper_with_build_options"); + cl_int err = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "get_kernel_arg_info", + get_build_options(device).c_str()); + test_error(err, "create_single_kernel_helper"); for (size_t i = 0; i < expected_args.size(); ++i) { KernelArgInfo actual; @@ -874,11 +874,10 @@ static int test_null_param(cl_context context, cl_device_id device, { clProgramWrapper program; clKernelWrapper kernel; - cl_int err = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", - get_build_options(device).c_str()); - test_error_ret(err, "create_single_kernel_helper_with_build_options", - TEST_FAIL); + cl_int err = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "get_kernel_arg_info", + get_build_options(device).c_str()); + test_error_ret(err, "create_single_kernel_helper", TEST_FAIL); err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr, @@ -916,12 +915,11 @@ static int test_arg_name_size(cl_context context, cl_device_id device, char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1]; clProgramWrapper program; clKernelWrapper kernel; - cl_int err = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info", - get_build_options(device).c_str()); + cl_int err = create_single_kernel_helper(context, &program, &kernel, 1, + &kernel_src, "get_kernel_arg_info", + get_build_options(device).c_str()); - test_error_ret(err, "create_single_kernel_helper_with_build_options", - TEST_FAIL); + test_error_ret(err, "create_single_kernel_helper", TEST_FAIL); err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME, diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp index 86b3595c..fadae00a 100644 --- a/test_conformance/api/test_kernel_attributes.cpp +++ b/test_conformance/api/test_kernel_attributes.cpp @@ -382,17 +382,18 @@ REGISTER_TEST(null_required_work_group_size) clMemWrapper dst; dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int), nullptr, &error); + test_error(error, "clCreateBuffer failed"); struct KernelAttribInfo { - std::string str; - cl_uint max_dim; + cl_int wgs[3]; + cl_uint min_dim; }; std::vector attribs; - attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 }); - attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 }); - attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 }); + attribs.push_back({ { 2, 1, 1 }, 1 }); + attribs.push_back({ { 2, 3, 1 }, 2 }); + attribs.push_back({ { 2, 3, 4 }, 3 }); const std::string body_str = R"( __kernel void wg_size(__global int* dst) @@ -409,7 +410,11 @@ REGISTER_TEST(null_required_work_group_size) for (auto& attrib : attribs) { - const std::string source_str = attrib.str + body_str; + const std::string attrib_str = "__attribute__((reqd_work_group_size(" + + std::to_string(attrib.wgs[0]) + "," + + std::to_string(attrib.wgs[1]) + "," + + std::to_string(attrib.wgs[2]) + ")))"; + const std::string source_str = attrib_str + body_str; const char* source = source_str.c_str(); clProgramWrapper program; @@ -421,21 +426,19 @@ REGISTER_TEST(null_required_work_group_size) error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst); test_error(error, "clSetKernelArg failed"); - for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++) + for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++) { - const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1, - work_dim >= 3 ? 4 : 1 }; const size_t test_work_group_size = - expected[0] * expected[1] * expected[2]; - if ((size_t)expected[0] > device_max_work_item_sizes[0] - || (size_t)expected[1] > device_max_work_item_sizes[1] - || (size_t)expected[2] > device_max_work_item_sizes[2] + attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2]; + if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0] + || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1] + || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2] || test_work_group_size > device_max_work_group_size) { log_info("Skipping test for work_dim = %u: required work group " "size (%i, %i, %i) (total %zu) exceeds device max " "work group size (%zu, %zu, %zu) (total %zu)\n", - work_dim, expected[0], expected[1], expected[2], + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], test_work_group_size, device_max_work_item_sizes[0], device_max_work_item_sizes[1], device_max_work_item_sizes[2], @@ -444,8 +447,10 @@ REGISTER_TEST(null_required_work_group_size) } const cl_int zero = 0; - error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0, - sizeof(expected), 0, nullptr, nullptr); + error = + clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0, + sizeof(attrib.wgs), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, @@ -458,12 +463,12 @@ REGISTER_TEST(null_required_work_group_size) results, 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); - if (results[0] != expected[0] || results[1] != expected[1] - || results[2] != expected[2]) + if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1] + || results[2] != attrib.wgs[2]) { log_error("Executed local size mismatch with work_dim = %u: " "Expected (%d,%d,%d) got (%d,%d,%d)\n", - work_dim, expected[0], expected[1], expected[2], + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], results[0], results[1], results[2]); return TEST_FAIL; } @@ -477,15 +482,15 @@ REGISTER_TEST(null_required_work_group_size) test_error(error, "clGetKernelSuggestedLocalWorkSizeKHR failed"); - if ((cl_int)suggested[0] != expected[0] - || (cl_int)suggested[1] != expected[1] - || (cl_int)suggested[2] != expected[2]) + if (suggested[0] != (size_t)attrib.wgs[0] + || suggested[1] != (size_t)attrib.wgs[1] + || suggested[2] != (size_t)attrib.wgs[2]) { log_error("Suggested local size mismatch with work_dim = " - "%u: Expected (%d,%d,%d) got (%d,%d,%d)\n", - work_dim, expected[0], expected[1], expected[2], - (cl_int)suggested[0], (cl_int)suggested[1], - (cl_int)suggested[2]); + "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n", + work_dim, attrib.wgs[0], attrib.wgs[1], + attrib.wgs[2], suggested[0], suggested[1], + suggested[2]); return TEST_FAIL; } } diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 47100b7a..d0dc116f 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -982,6 +982,12 @@ REGISTER_TEST(negative_invalid_arg_index) REGISTER_TEST(negative_invalid_arg_size_local) { + if (true) + { + log_info("Disabling this test temporarily, see internal issue 374.\n"); + return TEST_SKIPPED_ITSELF; + } + cl_int error = CL_SUCCESS; clProgramWrapper program; clKernelWrapper local_arg_kernel; diff --git a/test_conformance/api/test_queue_hint.cpp b/test_conformance/api/test_queue_hint.cpp index 89769d7e..21df4341 100644 --- a/test_conformance/api/test_queue_hint.cpp +++ b/test_conformance/api/test_queue_hint.cpp @@ -86,7 +86,8 @@ REGISTER_TEST(queue_hint) clProgramWrapper program; clKernelWrapper kernel; - err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL); + err = create_single_kernel_helper(context, &program, &kernel, 1, + queue_hint_test_kernel, "vec_cpy"); if (err != 0) { return err; diff --git a/test_conformance/api/test_spirv_queries.cpp b/test_conformance/api/test_spirv_queries.cpp index 720f73b1..d1536e0b 100644 --- a/test_conformance/api/test_spirv_queries.cpp +++ b/test_conformance/api/test_spirv_queries.cpp @@ -757,7 +757,7 @@ REGISTER_TEST(spirv_query_dependencies) } for (const auto& extension_dep : it->second.extensions) { - log_error("Checked for SPIR-V extension %s.n", + log_error("Checked for SPIR-V extension %s.\n", extension_dep.c_str()); } return TEST_FAIL; diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp index bb44abff..16b24390 100644 --- a/test_conformance/basic/test_arrayimagecopy.cpp +++ b/test_conformance/basic/test_arrayimagecopy.cpp @@ -188,9 +188,17 @@ REGISTER_TEST(arrayimagecopy) { PASSIVE_REQUIRE_IMAGE_SUPPORT(device) - return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, - CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, - test_arrayimagecopy_single_format); + int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, + test_arrayimagecopy_single_format); + if (is_extension_available(device, "cl_ext_immutable_memory_objects")) + { + error |= test_arrayimagecommon( + device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, + test_arrayimagecopy_single_format); + } + return error; } @@ -198,7 +206,15 @@ REGISTER_TEST(arrayimagecopy3d) { PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device) - return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, - CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, - test_arrayimagecopy_single_format); + int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE, + CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, + test_arrayimagecopy_single_format); + if (is_extension_available(device, "cl_ext_immutable_memory_objects")) + { + error |= test_arrayimagecommon( + device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, + test_arrayimagecopy_single_format); + } + return error; } diff --git a/test_conformance/basic/test_arrayreadwrite.cpp b/test_conformance/basic/test_arrayreadwrite.cpp index fe4bb995..4b0555fd 100644 --- a/test_conformance/basic/test_arrayreadwrite.cpp +++ b/test_conformance/basic/test_arrayreadwrite.cpp @@ -72,16 +72,36 @@ static int test_arrayreadwrite_impl(cl_device_id device, cl_context context, err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, offset * sizeof(cl_uint), sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr); - test_error(err, "clEnqueueWriteBuffer failed"); + if (flags & CL_MEM_IMMUTABLE_EXT) + { + test_failure_error_ret(err, CL_INVALID_OPERATION, + "clEnqueueWriteBuffer is expected to fail " + "with CL_INVALID_OPERATION when the buffer " + "is created with CL_MEM_IMMUTABLE_EXT", + TEST_FAIL); + } + else + { + test_error(err, "clEnqueueWriteBuffer failed"); + } err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, offset * sizeof(cl_uint), cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr); test_error(err, "clEnqueueReadBuffer failed"); + const cl_uint* expected_buffer_values = nullptr; + if (flags & CL_MEM_IMMUTABLE_EXT) + { + expected_buffer_values = inptr.data(); + } + else + { + expected_buffer_values = reference_vals.data(); + } for (int j = offset; j < offset + cb; j++) { - if (reference_vals[j] != outptr[j]) + if (expected_buffer_values[j] != outptr[j]) { log_error("ARRAY read, write test failed\n"); err = -1; @@ -105,3 +125,11 @@ REGISTER_TEST(arrayreadwrite) return test_arrayreadwrite_impl(device, context, queue, num_elements, CL_MEM_READ_WRITE); } + +REGISTER_TEST(immutable_arrayreadwrite) +{ + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + return test_arrayreadwrite_impl(device, context, queue, num_elements, + CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR); +} diff --git a/test_conformance/basic/test_bufferreadwriterect.cpp b/test_conformance/basic/test_bufferreadwriterect.cpp index 883bff7c..03ba2706 100644 --- a/test_conformance/basic/test_bufferreadwriterect.cpp +++ b/test_conformance/basic/test_bufferreadwriterect.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "harness/compat.h" +#include "errorHelpers.h" #include #include @@ -194,6 +195,43 @@ int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, si return 0; } +int immutable_copy_region(size_t src, size_t soffset[3], size_t sregion[3], + size_t dst, size_t doffset[3], size_t dregion[3]) +{ + + // Copy between cl buffers. + size_t src_slice_pitch = + (width[src] * height[src] != 1) ? width[src] * height[src] : 0; + size_t dst_slice_pitch = + (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0; + size_t src_row_pitch = width[src]; + + cl_int err; + if (check_overlap_rect(soffset, doffset, sregion, src_row_pitch, + src_slice_pitch)) + { + log_info("Copy overlap reported, skipping copy buffer rect\n"); + return CL_SUCCESS; + } + else + { + err = clEnqueueCopyBufferRect(gQueue, buffer[src], buffer[dst], soffset, + doffset, sregion, /*dregion,*/ + width[src], src_slice_pitch, width[dst], + dst_slice_pitch, 0, nullptr, nullptr); + if (err != CL_INVALID_OPERATION) + { + log_error( + "clEnqueueCopyBufferRect should return " + "CL_INVALID_OPERATION but returned %s between %zu and %zu", + IGetErrorString(err), src, dst); + return TEST_FAIL; + } + } + + return TEST_PASS; +} + // This function compares the destination region in the buffer pointed // to by device, to the source region of the specified verify buffer. int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) { @@ -337,6 +375,32 @@ int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, s return 0; } +int immutable_write_region(size_t src, size_t soffset[3], size_t sregion[3], + size_t dst, size_t doffset[3], size_t dregion[3]) +{ + initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt); + + size_t src_slice_pitch = + (width[src] * height[src] != 1) ? width[src] * height[src] : 0; + size_t dst_slice_pitch = + (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0; + + cl_int error = clEnqueueWriteBufferRect( + gQueue, buffer[dst], CL_TRUE, doffset, soffset, dregion, width[dst], + dst_slice_pitch, width[src], src_slice_pitch, tmp_buffer, 0, nullptr, + nullptr); + + if (error != CL_INVALID_OPERATION) + { + log_error("clEnqueueWriteBufferRect should return CL_INVALID_OPERATION " + "but retured %s between %zu and %zu", + IGetErrorString(error), src, dst); + return TEST_FAIL; + } + + return TEST_PASS; +} + void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data ) { free( data ); @@ -591,3 +655,16 @@ REGISTER_TEST(bufferreadwriterect) device, context, queue, num_elements, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions); } + +REGISTER_TEST(immutable_bufferreadwriterect) +{ + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + TestFunctions test_functions; + test_functions.copy = immutable_copy_region; + test_functions.read = read_verify_region; + test_functions.write = immutable_write_region; + return test_bufferreadwriterect_impl( + device, context, queue, num_elements, + CL_MEM_USE_HOST_PTR | CL_MEM_IMMUTABLE_EXT, test_functions); +} diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp index 47123120..685982d1 100644 --- a/test_conformance/basic/test_enqueued_local_size.cpp +++ b/test_conformance/basic/test_enqueued_local_size.cpp @@ -103,11 +103,11 @@ REGISTER_TEST_VERSION(enqueued_local_size, Version(2, 0)) std::string cl_std = "-cl-std=CL"; cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0"; - err = create_single_kernel_helper_with_build_options( + err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", cl_std.c_str()); test_error(err, "create_single_kernel_helper failed"); - err = create_single_kernel_helper_with_build_options( + err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", cl_std.c_str()); test_error(err, "create_single_kernel_helper failed"); diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp index a400c460..d0ce67a1 100644 --- a/test_conformance/basic/test_imagearraycopy.cpp +++ b/test_conformance/basic/test_imagearraycopy.cpp @@ -27,6 +27,82 @@ using test_function_t = int (*)(cl_device_id, cl_context, cl_command_queue, cl_mem_flags, cl_mem_flags, cl_mem_object_type, const cl_image_format *); +static int test_negative_imagearraycopy_single_format( + cl_device_id device, cl_context context, cl_command_queue queue, + cl_mem_flags image_flags, cl_mem_flags buffer_flags, + cl_mem_object_type image_type, const cl_image_format *format) +{ + std::unique_ptr bufptr{ nullptr, free }, + imgptr{ nullptr, free }; + clMemWrapper image; + clMemWrapper buffer; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; + size_t elem_size; + size_t buffer_size; + cl_int err; + RandomSeed seed(gRandomSeed); + + const size_t origin[3] = { 0, 0, 0 }, + region[3] = { img_width, img_height, img_depth }; + + log_info("Testing %s %s\n", + GetChannelOrderName(format->image_channel_order), + GetChannelTypeName(format->image_channel_data_type)); + + elem_size = get_pixel_size(format); + buffer_size = + sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth; + + if (image_flags & CL_MEM_USE_HOST_PTR || image_flags & CL_MEM_COPY_HOST_PTR) + { + imgptr.reset(static_cast( + create_random_data(kUChar, seed, buffer_size))); + } + + bufptr.reset( + static_cast(create_random_data(kUChar, seed, buffer_size))); + + if (CL_MEM_OBJECT_IMAGE2D == image_type) + { + image = create_image_2d(context, image_flags, format, img_width, + img_height, 0, imgptr.get(), &err); + } + else + { + image = + create_image_3d(context, image_flags, format, img_width, img_height, + img_depth, 0, 0, imgptr.get(), &err); + } + test_error(err, "create_image_xd failed"); + + if (!(image_flags & CL_MEM_USE_HOST_PTR + || image_flags & CL_MEM_COPY_HOST_PTR)) + { + imgptr.reset(static_cast( + create_random_data(kUChar, seed, buffer_size))); + + err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0, + imgptr.get(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteImage failed"); + } + + buffer = + clCreateBuffer(context, buffer_flags, buffer_size, bufptr.get(), &err); + test_error(err, "clCreateBuffer failed"); + + err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0, + nullptr, nullptr); + test_failure_error_ret( + err, CL_INVALID_OPERATION, + "clEnqueueCopyImageToBuffer should return CL_INVALID_OPERATION when: " + "\" dst_buffer is created with CL_MEM_IMMUTABLE_EXT flag\"", + TEST_FAIL); + + return TEST_PASS; +} + static int test_imagearraycopy_single_format( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_flags image_flags, cl_mem_flags buffer_flags, @@ -188,9 +264,18 @@ REGISTER_TEST(imagearraycopy) { PASSIVE_REQUIRE_IMAGE_SUPPORT(device) - return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE, - CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, - test_imagearraycopy_single_format); + int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, + test_imagearraycopy_single_format); + + if (is_extension_available(device, "cl_ext_immutable_memory_objects")) + { + error |= test_imagearraycommon( + device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, + test_imagearraycopy_single_format); + } + return error; } @@ -198,7 +283,38 @@ REGISTER_TEST(imagearraycopy3d) { PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device) - return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY, - CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, - test_imagearraycopy_single_format); + int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY, + CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, + test_imagearraycopy_single_format); + + if (is_extension_available(device, "cl_ext_immutable_memory_objects")) + { + error |= test_imagearraycommon( + device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, + test_imagearraycopy_single_format); + } + return error; +} + +REGISTER_TEST(negative_imagearraycopy) +{ + PASSIVE_REQUIRE_IMAGE_SUPPORT(device); + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE, + CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_OBJECT_IMAGE2D, + test_negative_imagearraycopy_single_format); +} + +REGISTER_TEST(negative_imagearraycopy3d) +{ + PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device); + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY, + CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_OBJECT_IMAGE3D, + test_negative_imagearraycopy_single_format); } diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp index ac0a84f9..ecd2d9bf 100644 --- a/test_conformance/basic/test_sizeof.cpp +++ b/test_conformance/basic/test_sizeof.cpp @@ -50,8 +50,8 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty { sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; } - cl_int err = create_single_kernel_helper_with_build_options( - context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr); + cl_int err = create_single_kernel_helper(context, &p, &k, 4, + sizeof_kernel_code, "test_sizeof"); test_error(err, "Failed to build kernel/program."); m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err ); diff --git a/test_conformance/buffers/main.cpp b/test_conformance/buffers/main.cpp index f2a8c2a3..496d3b7d 100644 --- a/test_conformance/buffers/main.cpp +++ b/test_conformance/buffers/main.cpp @@ -19,19 +19,24 @@ #include "testBase.h" -const cl_mem_flags flag_set[] = { - CL_MEM_ALLOC_HOST_PTR, - CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, - CL_MEM_USE_HOST_PTR, - CL_MEM_COPY_HOST_PTR, - 0 -}; +const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR, + CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, + CL_MEM_USE_HOST_PTR, + CL_MEM_COPY_HOST_PTR, + 0, + CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, + CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR, + CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR + | CL_MEM_ALLOC_HOST_PTR }; const char* flag_set_names[] = { "CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_USE_HOST_PTR", "CL_MEM_COPY_HOST_PTR", - "0" + "0", + "CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR", + "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR", + "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR", }; int main( int argc, const char *argv[] ) diff --git a/test_conformance/buffers/testBase.h b/test_conformance/buffers/testBase.h index 8c5bb0e4..4cd17155 100644 --- a/test_conformance/buffers/testBase.h +++ b/test_conformance/buffers/testBase.h @@ -25,6 +25,6 @@ extern const cl_mem_flags flag_set[]; extern const char* flag_set_names[]; -#define NUM_FLAGS 5 +#define NUM_FLAGS 8 #endif // _testBase_h diff --git a/test_conformance/buffers/test_buffer_copy.cpp b/test_conformance/buffers/test_buffer_copy.cpp index 81dbd5cf..cba2c626 100644 --- a/test_conformance/buffers/test_buffer_copy.cpp +++ b/test_conformance/buffers/test_buffer_copy.cpp @@ -39,7 +39,8 @@ static int verify_copy_buffer(int *inptr, int *outptr, int n) using alignedOwningPtr = std::unique_ptr; -static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d ) +static int test_copy(cl_device_id device, cl_command_queue queue, + cl_context context, int num_elements, MTdata d) { clMemWrapper buffers[2]; cl_int err = CL_SUCCESS; @@ -76,10 +77,19 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen return TEST_FAIL; } + const bool has_immutable_memory_extension = + is_extension_available(device, "cl_ext_immutable_memory_objects"); + for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { + if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)) + && !has_immutable_memory_extension) + { + continue; + } log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); for (int i = 0; i < num_elements; i++) @@ -89,7 +99,6 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen reference_ptr[i] = (int)genrand_int32(d); } - if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, @@ -116,7 +125,9 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen return TEST_FAIL; } - if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) { + if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) + && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) + { err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * num_elements, reference_ptr.get(), 0, nullptr, @@ -130,11 +141,44 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0, sizeof(cl_int) * num_elements, 0, nullptr, nullptr); - if ( err != CL_SUCCESS ){ + if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)) + { + if (err != CL_INVALID_OPERATION) + { + test_failure_error_ret(err, CL_INVALID_OPERATION, + "clEnqueueCopyBuffer should return " + "CL_INVALID_OPERATION when: " + "\"dst_buffer is created with " + "CL_MEM_IMMUTABLE_EXT flag\"", + TEST_FAIL); + return TEST_FAIL; + } + } + else if (err != CL_SUCCESS) + { print_error(err, "clCopyArray failed\n"); return TEST_FAIL; } + err = clEnqueueReadBuffer(queue, buffers[0], true, 0, + sizeof(int) * num_elements, out_ptr.get(), + 0, nullptr, nullptr); + if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(), + num_elements)) + { + log_error("test failed\n"); + return TEST_FAIL; + } + else + { + log_info("test passed\n"); + } + + // Reset out_ptr + for (int i = 0; i < num_elements; i++) + { + out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data + } err = clEnqueueReadBuffer(queue, buffers[1], true, 0, sizeof(int) * num_elements, out_ptr.get(), 0, nullptr, nullptr); @@ -143,14 +187,20 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen return TEST_FAIL; } - if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(), - num_elements)) + int *target_buffer = reference_ptr.get(); + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) { - log_error( " test failed\n" ); + target_buffer = invalid_ptr.get(); + } + + if (verify_copy_buffer(target_buffer, out_ptr.get(), num_elements)) + { + log_error("test failed\n"); return TEST_FAIL; } - else{ - log_info( " test passed\n" ); + else + { + log_info("test passed\n"); } } // dst flags } // src flags @@ -160,7 +210,10 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen } // end test_copy() -static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d ) +static int testPartialCopy(cl_device_id device, cl_command_queue queue, + cl_context context, int num_elements, + cl_uint srcStart, cl_uint dstStart, int size, + MTdata d) { clMemWrapper buffers[2]; cl_int err = CL_SUCCESS; @@ -197,10 +250,19 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_ return TEST_FAIL; } + const bool has_immutable_memory_extension = + is_extension_available(device, "cl_ext_immutable_memory_objects"); + for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { + if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)) + && !has_immutable_memory_extension) + { + continue; + } log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); for (int i = 0; i < num_elements; i++) @@ -236,7 +298,9 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_ return TEST_FAIL; } - if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){ + if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) + && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) + { err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * num_elements, reference_ptr.get(), 0, nullptr, @@ -251,27 +315,72 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_ queue, buffers[0], buffers[1], srcStart * sizeof(cl_int), dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr, nullptr); - if ( err != CL_SUCCESS){ - print_error(err, "clEnqueueCopyBuffer failed\n"); + if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)) + { + if (err != CL_INVALID_OPERATION) + { + test_failure_error_ret(err, CL_INVALID_OPERATION, + "clEnqueueCopyBuffer should return " + "CL_INVALID_OPERATION when: " + "\"dst_buffer is created with " + "CL_MEM_IMMUTABLE_EXT flag\"", + TEST_FAIL); + } + } + else if (err != CL_SUCCESS) + { + print_error(err, "clCopyArray failed\n"); return TEST_FAIL; } + err = clEnqueueReadBuffer(queue, buffers[0], true, 0, + sizeof(int) * num_elements, out_ptr.get(), + 0, nullptr, nullptr); + if (err != CL_SUCCESS) + { + print_error(err, "clEnqueueReadBuffer failed\n"); + return TEST_FAIL; + } + if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(), + num_elements)) + { + log_error("test failed\n"); + return TEST_FAIL; + } + else + { + log_info("test passed\n"); + } + + // Reset out_ptr + for (int i = 0; i < num_elements; i++) + { + out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data + } err = clEnqueueReadBuffer(queue, buffers[1], true, 0, sizeof(int) * num_elements, out_ptr.get(), 0, nullptr, nullptr); - if ( err != CL_SUCCESS){ + if (err != CL_SUCCESS) + { print_error(err, "clEnqueueReadBuffer failed\n"); return TEST_FAIL; } - if (verify_copy_buffer(reference_ptr.get() + srcStart, - out_ptr.get() + dstStart, size)) + cl_int *target_buffer = reference_ptr.get() + srcStart; + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) { - log_error("buffer_COPY test failed\n"); + target_buffer = invalid_ptr.get(); + } + + if (verify_copy_buffer(target_buffer, out_ptr.get() + dstStart, + size)) + { + log_error("test failed\n"); return TEST_FAIL; } - else{ - log_info("buffer_COPY test passed\n"); + else + { + log_info("test passed\n"); } } // dst mem flags } // src mem flags @@ -289,7 +398,7 @@ REGISTER_TEST(buffer_copy) // test the preset size log_info( "set size: %d: ", num_elements ); - if (test_copy(queue, context, num_elements, d) != TEST_PASS) + if (test_copy(device, queue, context, num_elements, d) != TEST_PASS) { err++; } @@ -298,7 +407,7 @@ REGISTER_TEST(buffer_copy) for ( i = 0; i < 8; i++ ){ size = (int)get_random_float(2.f,131072.f, d); log_info( "random size: %d: ", size ); - if (test_copy(queue, context, size, d) != TEST_PASS) + if (test_copy(device, queue, context, size, d) != TEST_PASS) { err++; } @@ -324,8 +433,8 @@ REGISTER_TEST(buffer_partial_copy) size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d ); dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d ); log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size ); - if (testPartialCopy(queue, context, num_elements, srcStart, dstStart, - size, d) + if (testPartialCopy(device, queue, context, num_elements, srcStart, + dstStart, size, d) != TEST_PASS) { err++; diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp index 2e7a22de..d8fa7654 100644 --- a/test_conformance/buffers/test_buffer_fill.cpp +++ b/test_conformance/buffers/test_buffer_fill.cpp @@ -598,6 +598,12 @@ static int test_buffer_fill(cl_device_id deviceID, cl_context context, for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clEventWrapper event[2]; clMemWrapper buffers[2]; if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) @@ -721,6 +727,12 @@ REGISTER_TEST(buffer_fill_struct) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clProgramWrapper program; clKernelWrapper kernel; log_info("Testing with cl_mem_flags: %s\n", diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp index 5cac90ab..3299902c 100644 --- a/test_conformance/buffers/test_buffer_map.cpp +++ b/test_conformance/buffers/test_buffer_map.cpp @@ -592,6 +592,12 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clMemWrapper buffer; outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ @@ -671,6 +677,101 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c } // end test_buffer_map_read() +int test_immutable_buffer_map(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + size_t size, const char *type, int loops) +{ + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + void *outptr[5]; + cl_int err; + int i; + size_t ptrSizes[5]; + int total_errors = 0; + MTdataHolder mtdata(gRandomSeed); + + size_t min_alignment = get_min_alignment(context); + + ptrSizes[0] = size; + ptrSizes[1] = ptrSizes[0] << 1; + ptrSizes[2] = ptrSizes[1] << 1; + ptrSizes[3] = ptrSizes[2] << 1; + ptrSizes[4] = ptrSizes[3] << 1; + + // embedded devices don't support long/ulong so skip over + if (!gHasLong && strstr(type, "long")) return TEST_SKIPPED_ITSELF; + + for (i = 0; i < loops; i++) + { + for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) + { + // Testing writing from immutable flags + if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)) + { + continue; + } + + clMemWrapper buffer; + outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); + if (!outptr[i]) + { + log_error(" unable to allocate %d bytes of memory\n", + (int)ptrSizes[i] * num_elements); + return TEST_FAIL; + } + generate_random_data(kUChar, ptrSizes[i] * num_elements, mtdata, + outptr[i]); + + buffer = + clCreateBuffer(context, flag_set[src_flag_id], + ptrSizes[i] * num_elements, outptr[i], &err); + + if (nullptr == buffer || CL_SUCCESS != err) + { + print_error(err, "clCreateBuffer failed\n"); + align_free(outptr[i]); + return TEST_FAIL; + } + + void *mappedPtr = clEnqueueMapBuffer( + queue, buffer, CL_TRUE, CL_MAP_READ, 0, + ptrSizes[i] * num_elements, 0, nullptr, nullptr, &err); + if (err != CL_SUCCESS) + { + print_error(err, "clEnqueueMapBuffer failed"); + align_free(outptr[i]); + return TEST_FAIL; + } + + if (memcmp(mappedPtr, outptr[i], ptrSizes[i] * num_elements) != 0) + { + log_error(" %s%d test failed. cl_mem_flags src: %s\n", type, + 1 << i, flag_set_names[src_flag_id]); + total_errors++; + } + else + { + log_info(" %s%d test passed. cl_mem_flags src: %s\n", type, + 1 << i, flag_set_names[src_flag_id]); + } + + err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, nullptr, + nullptr); + test_error(err, "clEnqueueUnmapMemObject failed"); + + // If we are using the outptr[i] as backing via USE_HOST_PTR we need + // to make sure we are done before freeing. + if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) + { + err = clFinish(queue); + test_error(err, "clFinish failed"); + } + align_free(outptr[i]); + } + } // cl_mem_flags + + return total_errors > 0 ? TEST_FAIL : TEST_PASS; +} #define DECLARE_LOCK_TEST(type, realType) \ REGISTER_TEST(buffer_map_read_##type) \ @@ -691,6 +792,28 @@ DECLARE_LOCK_TEST(char, cl_char) DECLARE_LOCK_TEST(uchar, cl_uchar) DECLARE_LOCK_TEST(float, cl_float) +#undef DECLARE_LOCK_TEST + +#define DECLARE_LOCK_TEST(type, realType) \ + REGISTER_TEST(immutable_buffer_map_##type) \ + { \ + return test_immutable_buffer_map(device, context, queue, num_elements, \ + sizeof(realType), #type, 5); \ + } + +DECLARE_LOCK_TEST(int, cl_int) +DECLARE_LOCK_TEST(uint, cl_uint) +DECLARE_LOCK_TEST(long, cl_long) +DECLARE_LOCK_TEST(ulong, cl_ulong) +DECLARE_LOCK_TEST(short, cl_short) +DECLARE_LOCK_TEST(ushort, cl_ushort) +DECLARE_LOCK_TEST(char, cl_char) +DECLARE_LOCK_TEST(uchar, cl_uchar) +DECLARE_LOCK_TEST(float, cl_float) + +#undef DECLARE_LOCK_TEST + + REGISTER_TEST(buffer_map_read_struct) { int (*foo)(void *,int); diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp index dbf39ab4..814dee45 100644 --- a/test_conformance/buffers/test_buffer_read.cpp +++ b/test_conformance/buffers/test_buffer_read.cpp @@ -666,6 +666,12 @@ static int test_buffer_read(cl_device_id deviceID, cl_context context, for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clMemWrapper buffer; outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ @@ -809,6 +815,12 @@ static int test_buffer_read_async(cl_device_id deviceID, cl_context context, for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clMemWrapper buffer; clEventWrapper event; outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); @@ -946,6 +958,12 @@ static int test_buffer_read_array_barrier( for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clMemWrapper buffer; clEventWrapper event; outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment); diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp index 36dcc963..7c92dfd9 100644 --- a/test_conformance/buffers/test_buffer_write.cpp +++ b/test_conformance/buffers/test_buffer_write.cpp @@ -660,8 +660,18 @@ static int test_buffer_write(cl_device_id deviceID, cl_context context, for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { + // Skip immutable memory flags + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } clMemWrapper buffers[2]; if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) @@ -834,8 +844,19 @@ REGISTER_TEST(buffer_write_struct) for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { + // Skip immutable memory flags + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + clMemWrapper buffers[2]; inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment); @@ -996,7 +1017,17 @@ static int test_buffer_write_array_async( ptrSizes[4] = ptrSizes[3] << 1; for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) { + // Skip immutable memory flags + if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) { + // Skip immutable memory flags + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]); loops = ( loops < 5 ? loops : 5 ); @@ -1974,3 +2005,256 @@ REGISTER_TEST(buffer_write_async_ulong) } // end test_buffer_ulong_write_array_async() + +int immutable_test_buffer_write(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + size_t size, const char *type, int loops, + void *inptr[5], const char *kernelCode[], + const char *kernelName[], + int (*fn)(void *, void *, int), MTdataHolder &d) +{ + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + void *outptr[5]; + clProgramWrapper program[5]; + clKernelWrapper kernel[5]; + size_t ptrSizes[5]; + size_t global_work_size[3]; + cl_int err; + int i; + int src_flag_id, dst_flag_id; + int total_errors = 0; + + size_t min_alignment = get_min_alignment(context); + + global_work_size[0] = (size_t)num_elements; + + ptrSizes[0] = size; + ptrSizes[1] = ptrSizes[0] << 1; + ptrSizes[2] = ptrSizes[1] << 1; + ptrSizes[3] = ptrSizes[2] << 1; + ptrSizes[4] = ptrSizes[3] << 1; + + loops = (loops < 5 ? loops : 5); + for (i = 0; i < loops; i++) + { + err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, + &kernelCode[i], kernelName[i]); + if (err) + { + log_error(" Error creating program for %s\n", type); + return TEST_FAIL; + } + + for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++) + { + // Testing writing from immutable flags + if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)) + { + continue; + } + for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++) + { + // Skip immutable memory flags + if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT) + { + continue; + } + + cl_mem_flags src_mem_flags = flag_set[src_flag_id]; + cl_mem_flags dst_mem_flags = flag_set[dst_flag_id]; + clMemWrapper buffers[2]; + + buffers[0] = + clCreateBuffer(context, src_mem_flags, + ptrSizes[i] * num_elements, inptr[i], &err); + + if (nullptr == buffers[0] || CL_SUCCESS != err) + { + align_free(outptr[i]); + print_error(err, " clCreateBuffer failed\n"); + return TEST_FAIL; + } + if (!strcmp(type, "half")) + { + outptr[i] = align_malloc(ptrSizes[i] * (num_elements * 2), + min_alignment); + buffers[1] = clCreateBuffer(context, dst_mem_flags, + ptrSizes[i] * 2 * num_elements, + outptr[i], &err); + } + else + { + outptr[i] = + align_malloc(ptrSizes[i] * num_elements, min_alignment); + if ((dst_mem_flags & CL_MEM_USE_HOST_PTR) + || (dst_mem_flags & CL_MEM_COPY_HOST_PTR)) + buffers[1] = clCreateBuffer(context, dst_mem_flags, + ptrSizes[i] * num_elements, + outptr[i], &err); + else + buffers[1] = clCreateBuffer(context, dst_mem_flags, + ptrSizes[i] * num_elements, + nullptr, &err); + } + if (err) + { + align_free(outptr[i]); + print_error(err, " clCreateBuffer failed\n"); + return TEST_FAIL; + } + + err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), + (void *)&buffers[0]); + err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem), + (void *)&buffers[1]); + if (err != CL_SUCCESS) + { + align_free(outptr[i]); + print_error(err, " clSetKernelArg failed"); + return TEST_FAIL; + } + + err = clEnqueueNDRangeKernel(queue, kernel[i], 1, nullptr, + global_work_size, nullptr, 0, + nullptr, nullptr); + if (err != CL_SUCCESS) + { + print_error(err, " clEnqueueNDRangeKernel failed"); + align_free(outptr[i]); + return TEST_FAIL; + } + + err = clEnqueueReadBuffer(queue, buffers[1], true, 0, + ptrSizes[i] * num_elements, outptr[i], + 0, nullptr, nullptr); + + if (err != CL_SUCCESS) + { + align_free(outptr[i]); + print_error(err, " clEnqueueReadBuffer failed"); + return TEST_FAIL; + } + + if (fn(inptr[i], outptr[i], + (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]))) + { + log_error( + " %s%d test failed. cl_mem_flags src: %s, dst: %s\n", + type, 1 << i, flag_set_names[src_flag_id], + flag_set_names[dst_flag_id]); + total_errors++; + } + else + { + log_info( + " %s%d test passed. cl_mem_flags src: %s, dst: %s\n", + type, 1 << i, flag_set_names[src_flag_id], + flag_set_names[dst_flag_id]); + } + // cleanup + align_free(outptr[i]); + } + } // dst cl_mem_flag + } // src cl_mem_flag + + return total_errors; + +} // end test_buffer_write() + +REGISTER_TEST(write_from_immutable_buffer_to_buffer) +{ + REQUIRE_EXTENSION("cl_ext_immutable_memory_objects"); + + static const char *immutable_buffer_write_int_kernel_code[] = { + R"( + __kernel void test_buffer_write_int(constant int *src, __global int *dst) + { + int tid = get_global_id(0); + + dst[tid] = src[tid]; + })", + + R"( + __kernel void test_buffer_write_int2(constant int2 *src, __global int2 *dst) + { + int tid = get_global_id(0); + + dst[tid] = src[tid]; + })", + + R"( + __kernel void test_buffer_write_int4(constant int4 *src, __global int4 *dst) + { + int tid = get_global_id(0); + + dst[tid] = src[tid]; + })", + + R"( + __kernel void test_buffer_write_int8(constant int8 *src, __global int8 *dst) + { + int tid = get_global_id(0); + + dst[tid] = src[tid]; + })", + + R"( + __kernel void test_buffer_write_int16(constant int16 *src, __global int16 *dst) + { + int tid = get_global_id(0); + + dst[tid] = src[tid]; + })" + }; + + static const char *immutable_int_kernel_name[] = { + "test_buffer_write_int", "test_buffer_write_int2", + "test_buffer_write_int4", "test_buffer_write_int8", + "test_buffer_write_int16" + }; + + if (gTestMap) + { + log_error("Immutable buffers cannot be mapped with CL_MEM_WRITE\n"); + return TEST_SKIPPED_ITSELF; + } + + int *inptr[5]; + size_t ptrSizes[5]; + int i, err; + cl_uint j; + int (*foo)(void *, void *, int); + MTdataHolder d(gRandomSeed); + + size_t min_alignment = get_min_alignment(context); + + foo = verify_write_int; + + ptrSizes[0] = sizeof(cl_int); + ptrSizes[1] = ptrSizes[0] << 1; + ptrSizes[2] = ptrSizes[1] << 1; + ptrSizes[3] = ptrSizes[2] << 1; + ptrSizes[4] = ptrSizes[3] << 1; + + for (i = 0; i < 5; i++) + { + inptr[i] = + (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment); + + for (j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++) + inptr[i][j] = (int)genrand_int32(d); + } + + err = immutable_test_buffer_write(device, context, queue, num_elements, + sizeof(cl_int), "int", 5, (void **)inptr, + immutable_buffer_write_int_kernel_code, + immutable_int_kernel_name, foo, d); + + for (i = 0; i < 5; i++) + { + align_free((void *)inptr[i]); + } + + return err; +} diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp index 3be3fbc1..7bb2da76 100644 --- a/test_conformance/c11_atomics/common.cpp +++ b/test_conformance/c11_atomics/common.cpp @@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo::MinValue() {return CL_INT_MIN; template<> cl_uint AtomicTypeExtendedInfo::MinValue() {return 0;} template<> cl_long AtomicTypeExtendedInfo::MinValue() {return CL_LONG_MIN;} template <> cl_ulong AtomicTypeExtendedInfo::MinValue() { return 0; } -template <> cl_half AtomicTypeExtendedInfo::MinValue() +template <> HostHalf AtomicTypeExtendedInfo::MinValue() { - return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode); + return -CL_HALF_MAX; } template <> cl_float AtomicTypeExtendedInfo::MinValue() { @@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo::MaxValue() } template<> cl_long AtomicTypeExtendedInfo::MaxValue() {return CL_LONG_MAX;} template<> cl_ulong AtomicTypeExtendedInfo::MaxValue() {return CL_ULONG_MAX;} -template <> cl_half AtomicTypeExtendedInfo::MaxValue() +template <> HostHalf AtomicTypeExtendedInfo::MaxValue() { - return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode); + return CL_HALF_MAX; } template <> cl_float AtomicTypeExtendedInfo::MaxValue() { diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index aee5173d..5d12b2ab 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -22,12 +22,14 @@ #include "host_atomics.h" -#include "CL/cl_half.h" - +#include #include +#include #include #include +#include "CL/cl_half.h" + #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() @@ -76,6 +78,10 @@ extern int extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device +extern cl_device_fp_config gDoubleFPConfig; +extern cl_device_fp_config gFloatFPConfig; +extern cl_device_fp_config gHalfFPConfig; + extern cl_half_rounding_mode gHalfRoundingMode; extern bool gFloatAtomicsSupported; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; @@ -91,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes( cl_device_id device, std::vector &memoryOrders, std::vector &memoryScopes); +union FloatIntUnion { + float f; + uint32_t i; +}; + +template bool is_qnan(const HostDataType &value) +{ + if constexpr (std::is_same_v) + { + FloatIntUnion u; + u.f = value; + if ((u.i & 0x7F800000) != 0x7F800000) return false; + return (u.i & 0x00400000) != 0; + } + else + return std::isnan(value); +} + +template bool is_snan(const HostDataType &value) +{ + if constexpr (std::is_same_v) + { + FloatIntUnion u; + u.f = value; + if ((u.i & 0x7F800000) != 0x7F800000) return false; + return (u.i & 0x00400000) == 0; + } + else + return std::isnan(value); +} + class AtomicTypeInfo { public: TExplicitAtomicType _type; @@ -154,12 +191,12 @@ public: return 0; } CBasicTest(TExplicitAtomicType dataType, bool useSVM) - : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType), - _useSVM(useSVM), _startValue(255), _localMemory(false), - _declaredInProgram(false), _usedInFunction(false), - _genericAddrSpace(false), _oldValueCheck(true), - _localRefValues(false), _maxGroupSize(0), _passCount(0), - _iterations(gInternalIterations) + : CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255), + _localMemory(false), _declaredInProgram(false), + _usedInFunction(false), _genericAddrSpace(false), + _oldValueCheck(true), _localRefValues(false), _maxGroupSize(0), + _passCount(0), _iterations(gInternalIterations), + _maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0) {} virtual ~CBasicTest() { @@ -178,12 +215,15 @@ public: { return false; } + virtual bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) { - return expected != testValues[whichDestValue]; + return expected + != static_cast(testValues[whichDestValue]); } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) @@ -239,12 +279,12 @@ public: cl_command_queue queue) { int error = 0; - DeclaredInProgram(false); + SetDeclaredInProgram(false); EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); if (!UseSVM()) { - DeclaredInProgram(true); + SetDeclaredInProgram(true); EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue)); } @@ -255,13 +295,13 @@ public: cl_command_queue queue) { int error = 0; - if (_maxDeviceThreads > 0 && !UseSVM()) + if (_deviceThreads > 0 && !UseSVM()) { SetLocalMemory(true); EXECUTE_TEST( error, ExecuteForEachDeclarationType(deviceID, context, queue)); } - if (_maxDeviceThreads + MaxHostThreads() > 0) + if (_deviceThreads + MaxHostThreads() > 0) { SetLocalMemory(false); EXECUTE_TEST( @@ -270,7 +310,7 @@ public: return error; } virtual int Execute(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements) + cl_command_queue queue, int num_elements) override { if (sizeof(HostAtomicType) != DataType().Size(deviceID)) { @@ -310,7 +350,12 @@ public: if (UseSVM()) return 0; _maxDeviceThreads = 0; } - if (_maxDeviceThreads + MaxHostThreads() == 0) return 0; + + _deviceThreads = (num_elements > 0) + ? std::min(cl_uint(num_elements), _maxDeviceThreads) + : _maxDeviceThreads; + + if (_deviceThreads + MaxHostThreads() == 0) return 0; return ExecuteForEachParameterSet(deviceID, context, queue); } virtual void HostFunction(cl_uint tid, cl_uint threadCount, @@ -323,7 +368,7 @@ public: { return AtomicTypeExtendedInfo(_dataType); } - cl_uint _maxDeviceThreads; + virtual cl_uint MaxHostThreads() { if (UseSVM() || gHost) @@ -420,7 +465,7 @@ public: HostDataType StartValue() { return _startValue; } void SetLocalMemory(bool local) { _localMemory = local; } bool LocalMemory() { return _localMemory; } - void DeclaredInProgram(bool declaredInProgram) + void SetDeclaredInProgram(bool declaredInProgram) { _declaredInProgram = declaredInProgram; } @@ -477,6 +522,8 @@ private: cl_uint _currentGroupSize; cl_uint _passCount; const cl_int _iterations; + cl_uint _maxDeviceThreads; + cl_uint _deviceThreads; }; template @@ -702,6 +749,28 @@ public: cl_context context, cl_command_queue queue) { + // Comparator for orders and scopes. + const auto checkValidity = [](TExplicitMemoryOrderType success, + TExplicitMemoryOrderType failure, + TExplicitMemoryScopeType scope) { + // Both memory order arguments must be set (or neither). + if ((success == MEMORY_ORDER_EMPTY || failure == MEMORY_ORDER_EMPTY) + && success != failure) + return false; + + // Memory scope without memory order is disallowed. + if (success == MEMORY_ORDER_EMPTY && scope != MEMORY_SCOPE_EMPTY) + return false; + + // Failure must not be release or acq_rel. + if (failure == MEMORY_ORDER_RELEASE + || failure == MEMORY_ORDER_ACQ_REL) + return false; + + // Failure must not be stronger than success. + return failure <= success; + }; + // repeat test for each reasonable memory order/scope combination std::vector memoryOrder; std::vector memoryScope; @@ -719,16 +788,10 @@ public: { for (unsigned si = 0; si < memoryScope.size(); si++) { - if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY - || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryOrder[oi] != memoryOrder[o2i]) - continue; // both memory order arguments must be set (or - // none) - if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY - || memoryOrder[o2i] == MEMORY_ORDER_EMPTY) - && memoryScope[si] != MEMORY_SCOPE_EMPTY) - continue; // memory scope without memory order is not - // allowed + if (!checkValidity(memoryOrder[oi], memoryOrder[o2i], + memoryScope[si])) + continue; + MemoryOrder(memoryOrder[oi]); MemoryOrder2(memoryOrder[o2i]); MemoryScope(memoryScope[si]); @@ -895,12 +958,25 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) + ss.str() + "] = {\n"; ss.str(""); - if (CBasicTest::DataType()._type - == TYPE_ATOMIC_FLOAT) - ss << std::setprecision(10) << _startValue; - else if (CBasicTest::DataType()._type - == TYPE_ATOMIC_HALF) - ss << cl_half_to_float(static_cast(_startValue)); + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) + { + if (std::isinf(_startValue)) + ss << (_startValue < 0 ? "-" : "") << "INFINITY"; + else if (std::isnan(_startValue)) + ss << "0.0 / 0.0"; + else + ss << std::setprecision( + std::numeric_limits::max_digits10) + << _startValue; + } + else if constexpr (std::is_same_v) + { + ss << std::setprecision(std::numeric_limits::max_digits10) + << cl_half_to_float(_startValue); + } else ss << _startValue; @@ -1137,7 +1213,7 @@ int CBasicTest::ExecuteSingleTest( MTdata d; size_t typeSize = DataType().Size(deviceID); - deviceThreadCount = _maxDeviceThreads; + deviceThreadCount = _deviceThreads; hostThreadCount = MaxHostThreads(); threadCount = deviceThreadCount + hostThreadCount; @@ -1202,9 +1278,8 @@ int CBasicTest::ExecuteSingleTest( programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems) + FunctionCode() + KernelCode(numDestItems); programLine = programSource.c_str(); - if (create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &programLine, - "test_atomic_kernel", gOldAPI ? "" : nullptr)) + if (create_single_kernel_helper(context, &program, &kernel, 1, + &programLine, "test_atomic_kernel")) { return -1; } @@ -1289,7 +1364,8 @@ int CBasicTest::ExecuteSingleTest( numDestItems = NumResults(threadCount, deviceID); destItems.resize(numDestItems); - for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue; + for (cl_uint i = 0; i < numDestItems; i++) + destItems[i] = static_cast(_startValue); // Create main buffer with atomic variables (array size dependent on // particular test) @@ -1462,12 +1538,13 @@ int CBasicTest::ExecuteSingleTest( startRefValues.size() ? &startRefValues[0] : 0, i)) break; // no expected value function provided - if (IsTestNotAsExpected(expected, destItems, i)) + if (IsTestNotAsExpected(expected, destItems, startRefValues, i)) { std::stringstream logLine; logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected - << ", was " << destItems[i] << ")\n"; + << ", was " << static_cast(destItems[i]) + << ")\n"; log_error("%s", logLine.str().c_str()); for (i = 0; i < threadCount; i++) { @@ -1534,7 +1611,8 @@ int CBasicTest::ExecuteSingleTest( // clEnqueueNDRangeKernel { /* Re-write the starting value */ - for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue; + for (size_t i = 0; i < numDestItems; i++) + destItems[i] = static_cast(_startValue); refValues[0] = 0; if (deviceThreadCount > 0) { diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 9a33f26c..4a65d8da 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -18,12 +18,15 @@ #include "harness/testHarness.h" #include + #include "CL/cl_half.h" #ifdef WIN32 #include "Windows.h" #endif +extern cl_half_rounding_mode gHalfRoundingMode; + //flag for test verification (good test should discover non-atomic functions and fail) //#define NON_ATOMIC_FUNCTIONS @@ -37,6 +40,93 @@ enum TExplicitMemoryOrderType MEMORY_ORDER_SEQ_CST }; +// Wrapper class for half-precision +class HostHalf { +public: + // Convert from semantic values + HostHalf(cl_uint value = 0) + : value( + cl_half_from_float(static_cast(value), gHalfRoundingMode)) + {} + HostHalf(int value): HostHalf(static_cast(value)) {} + HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode)) + {} + HostHalf(double value): HostHalf(static_cast(value)) {} + + // Convert to semantic values + operator cl_uint() const + { + return static_cast(cl_half_to_float(value)); + } + operator float() const { return cl_half_to_float(value); } + operator double() const + { + return static_cast(cl_half_to_float(value)); + } + + // Construct from bit representation + HostHalf(cl_half value): value(value) {} + + // Get the underlying bit representation + operator cl_half() const { return value; } + + HostHalf operator-() const + { + return HostHalf( + cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode)); + } + +#define GENERIC_OP(RetType, op) \ + RetType operator op(const HostHalf &other) const \ + { \ + return RetType(cl_half_to_float(value) \ + op cl_half_to_float(other.value)); \ + } + + GENERIC_OP(bool, ==) + GENERIC_OP(bool, !=) + GENERIC_OP(bool, <) + GENERIC_OP(bool, <=) + GENERIC_OP(bool, >) + GENERIC_OP(bool, >=) + GENERIC_OP(HostHalf, +) + GENERIC_OP(HostHalf, -) + GENERIC_OP(HostHalf, *) + GENERIC_OP(HostHalf, /) +#undef GENERIC_OP + +#define INPLACE_OP(op) \ + HostHalf &operator op##=(const HostHalf &other) \ + { \ + value = cl_half_from_float(cl_half_to_float(value) \ + op cl_half_to_float(other.value), \ + gHalfRoundingMode); \ + return *this; \ + } + INPLACE_OP(+) + INPLACE_OP(-) + INPLACE_OP(*) + INPLACE_OP(/) +#undef INPLACE_OP + + friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh) + { + float f = cl_half_to_float(hh.value); + os << f; + return os; + } + +private: + cl_half value; +}; + +namespace std { +inline HostHalf abs(const HostHalf &value) +{ + return value < HostHalf(0) ? -value : value; +} +} // namespace std + // host atomic types (applicable for atomic functions supported on host OS) #ifdef WIN32 #define HOST_ATOMIC_INT unsigned long @@ -73,7 +163,7 @@ enum TExplicitMemoryOrderType #define HOST_UINT cl_uint #define HOST_LONG cl_long #define HOST_ULONG cl_ulong -#define HOST_HALF cl_half +#define HOST_HALF HostHalf #define HOST_FLOAT cl_float #define HOST_DOUBLE cl_double @@ -91,6 +181,18 @@ enum TExplicitMemoryOrderType extern cl_half_rounding_mode gHalfRoundingMode; +template +constexpr bool is_host_atomic_fp_v = + std::disjunction_v, + std::is_same, + std::is_same>; + +template +constexpr bool is_host_fp_v = + std::disjunction_v, + std::is_same, + std::is_same>; + // host atomic functions void host_atomic_thread_fence(TExplicitMemoryOrderType order); @@ -98,24 +200,13 @@ template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { - if constexpr (std::is_same_v) + if constexpr (is_host_atomic_fp_v) { static std::mutex mx; std::lock_guard lock(mx); CorrespondingType old_value = *a; - *a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)), - gHalfRoundingMode); - return old_value; - } - else if constexpr ( - std::is_same_v< - AtomicType, - HOST_ATOMIC_FLOAT> || std::is_same_v) - { - static std::mutex mx; - std::lock_guard lock(mx); - CorrespondingType old_value = *a; - *a += c; + CorrespondingType new_value = old_value + c; + *a = static_cast(new_value); return old_value; } else @@ -135,21 +226,13 @@ template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { - if constexpr (std::is_same_v) + if constexpr (is_host_atomic_fp_v) { static std::mutex mx; std::lock_guard lock(mx); CorrespondingType old_value = *a; - *a -= c; - return old_value; - } - else if constexpr (std::is_same_v) - { - static std::mutex mx; - std::lock_guard lock(mx); - CorrespondingType old_value = *a; - *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)), - gHalfRoundingMode); + CorrespondingType new_value = old_value - c; + *a = static_cast(new_value); return old_value; } else @@ -170,12 +253,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType TExplicitMemoryOrderType order) { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - if (sizeof(CorrespondingType) == 2) - return InterlockedExchange16(reinterpret_cast(a), c); + if constexpr (sizeof(CorrespondingType) == 2) + return InterlockedExchange16(reinterpret_cast(a), + *reinterpret_cast(&c)); else - return InterlockedExchange(reinterpret_cast(a), c); + return InterlockedExchange(reinterpret_cast(a), + *reinterpret_cast(&c)); #elif defined(__GNUC__) - return __sync_lock_test_and_set(a, c); + return __sync_lock_test_and_set(a, *reinterpret_cast(&c)); #else log_info("Host function not implemented: atomic_exchange\n"); return 0; @@ -192,30 +277,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp TExplicitMemoryOrderType order_failure) { CorrespondingType tmp; - if constexpr (std::is_same_v) + if constexpr (is_host_atomic_fp_v) { static std::mutex mtx; std::lock_guard lock(mtx); - tmp = *reinterpret_cast(a); - - if (cl_half_to_float(tmp) == cl_half_to_float(*expected)) - { - *reinterpret_cast(a) = desired; - return true; - } - *expected = tmp; - } - else if constexpr ( - std::is_same_v< - AtomicType, - HOST_ATOMIC_DOUBLE> || std::is_same_v) - { - static std::mutex mtx; - std::lock_guard lock(mtx); - tmp = *reinterpret_cast(a); + tmp = static_cast(*a); if (tmp == *expected) { - *a = desired; + *a = static_cast(desired); return true; } *expected = tmp; @@ -241,8 +310,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a, TExplicitMemoryOrderType order) { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - if (sizeof(CorrespondingType) == 2) - auto prev = InterlockedOr16(reinterpret_cast(a), 0); + if constexpr (sizeof(CorrespondingType) == 2) + return InterlockedOr16(reinterpret_cast(a), 0); else return InterlockedExchangeAdd(reinterpret_cast(a), 0); #elif defined(__GNUC__) diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index e2f1888f..ee83dfdb 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -31,11 +31,14 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device +cl_device_fp_config gDoubleFPConfig = 0; +cl_device_fp_config gFloatFPConfig = 0; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; +cl_device_fp_config gHalfFPConfig = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -134,13 +137,17 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; - if (is_extension_available(device, "cl_khr_fp64")) { cl_int error = clGetDeviceInfo( device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + + error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, + sizeof(gDoubleFPConfig), &gDoubleFPConfig, + NULL); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); } cl_int error = clGetDeviceInfo( @@ -148,6 +155,13 @@ test_status InitCL(cl_device_id device) { sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, + sizeof(gFloatFPConfig), &gFloatFPConfig, NULL); + test_error_ret( + error, + "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)", + TEST_FAIL); + if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( @@ -170,6 +184,11 @@ test_status InitCL(cl_device_id device) { log_error("Error while acquiring half rounding mode\n"); return TEST_FAIL; } + + error = + clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, + sizeof(gHalfFPConfig), &gHalfFPConfig, NULL); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); } } diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index f46520ca..d3dd3703 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -94,13 +94,7 @@ public: HostDataType *startRefValues, cl_uint whichDestValue) { - if (CBasicTestMemOrderScope::DataType() - ._type - != TYPE_ATOMIC_HALF) - expected = (HostDataType)whichDestValue; - else - expected = cl_half_from_float(static_cast(whichDestValue), - gHalfRoundingMode); + expected = static_cast(whichDestValue); return true; } }; @@ -401,13 +395,7 @@ public: HostDataType *startRefValues, cl_uint whichDestValue) { - if (CBasicTestMemOrderScope::DataType() - ._type - != TYPE_ATOMIC_HALF) - expected = (HostDataType)whichDestValue; - else - expected = cl_half_from_float(static_cast(whichDestValue), - gHalfRoundingMode); + expected = static_cast(whichDestValue); return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, @@ -417,25 +405,11 @@ public: correct = true; for (cl_uint i = 0; i < threadCount; i++) { - if constexpr (std::is_same_v) + if (refValues[i] != (HostDataType)i) { - HostDataType test = cl_half_from_float(static_cast(i), - gHalfRoundingMode); - if (refValues[i] != test) - { - log_error("Invalid value for thread %u\n", (cl_uint)i); - correct = false; - return true; - } - } - else - { - if (refValues[i] != (HostDataType)i) - { - log_error("Invalid value for thread %u\n", (cl_uint)i); - correct = false; - return true; - } + log_error("Invalid value for thread %u\n", (cl_uint)i); + correct = false; + return true; } } return true; @@ -553,11 +527,7 @@ public: : CBasicTestMemOrderScope(dataType, useSVM) { - if constexpr (std::is_same_v) - StartValue(cl_half_from_float(static_cast(1234), - gHalfRoundingMode)); - else - StartValue(123456); + StartValue(1234); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) @@ -619,32 +589,19 @@ public: /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; - cl_uint startVal = StartValue(); - - if constexpr (std::is_same_v) - startVal = static_cast( - cl_half_to_float(static_cast(StartValue()))); + cl_uint startVal = static_cast(StartValue()); for (cl_uint i = 0; i <= threadCount; i++) { cl_uint value = 0; if (i == threadCount) { - if constexpr (!std::is_same_v) - value = - (cl_uint)finalValues[0]; // additional value from atomic - // variable (last written) - else - value = - cl_half_to_float(static_cast(finalValues[0])); + value = static_cast( + static_cast(finalValues[0])); } else { - if constexpr (!std::is_same_v) - value = (cl_uint)refValues[i]; - else - value = - cl_half_to_float(static_cast(refValues[i])); + value = static_cast(refValues[i]); } if (value == startVal) @@ -1201,85 +1158,24 @@ public: useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { StartValue((HostDataType)0.0); CBasicTestMemOrderScope::OldValueCheck(false); + + // Narrow down range for half to avoid overflow to infinity + if constexpr (std::is_same_v) + { + min_range = -50.0; + max_range = 50.0; + } } } - template float accum_halfs(Iterator begin, Iterator end) - { - cl_half sum = 0; - for (auto it = begin; it != end; ++it) - { - sum = cl_half_from_float(cl_half_to_float(sum) - + cl_half_to_float(*it), - gHalfRoundingMode); - } - return cl_half_to_float(sum); - } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) - { - if (threadCount > ref_vals.size()) - { - ref_vals.resize(threadCount); - - for (cl_uint i = 0; i < threadCount; i++) - ref_vals[i] = cl_half_from_float( - get_random_float(min_range, max_range, d), - gHalfRoundingMode); - - memcpy(startRefValues, ref_vals.data(), - sizeof(HostDataType) * ref_vals.size()); - - // Estimate highest possible summation error for given set. - std::vector sums; - std::sort(ref_vals.begin(), ref_vals.end(), - [](cl_half a, cl_half b) { - return cl_half_to_float(a) < cl_half_to_float(b); - }); - - sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end())); - sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend())); - - std::sort(ref_vals.begin(), ref_vals.end(), - [](cl_half a, cl_half b) { - return std::abs(cl_half_to_float(a)) - < std::abs(cl_half_to_float(b)); - }); - - float precise = 0.f; - for (auto elem : ref_vals) precise += cl_half_to_float(elem); - sums.push_back(precise); - - sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end())); - sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend())); - - std::sort(sums.begin(), sums.end()); - max_error = std::abs(sums.front() - sums.back()); - - // restore unsorted order - memcpy(ref_vals.data(), startRefValues, - sizeof(HostDataType) * ref_vals.size()); - } - else - { - memcpy(startRefValues, ref_vals.data(), - sizeof(HostDataType) * threadCount); - } - return true; - } - else if constexpr ( - std::is_same_v< - HostDataType, - HOST_FLOAT> || std::is_same_v) + if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { @@ -1299,11 +1195,12 @@ public: std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); - sums.push_back( - std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), + static_cast(0.f))); - sums.push_back( - std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + sums.push_back(std::accumulate(ref_vals.rbegin(), + ref_vals.rend(), + static_cast(0.f))); std::sort(ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) { @@ -1318,15 +1215,25 @@ public: sums.push_back(precise); - sums.push_back( - std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), + static_cast(0.f))); - sums.push_back( - std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + sums.push_back(std::accumulate(ref_vals.rbegin(), + ref_vals.rend(), + static_cast(0.f))); std::sort(sums.begin(), sums.end()); + assert(std::all_of(sums.begin(), sums.end(), + [](const HostDataType &val) { + return std::isfinite( + static_cast(val)); + }) + && "Infinite summation value detected!"); max_error = std::abs(sums.front() - sums.back()); + log_info("Max allowed error for %u elements: %.10f\n", + threadCount, max_error); + // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); @@ -1345,10 +1252,7 @@ public: std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" @@ -1382,10 +1286,7 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); @@ -1411,23 +1312,7 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) - { - if (whichDestValue == 0) - { - for (cl_uint i = 0; i < threadCount; i++) - { - expected = cl_half_from_float( - cl_half_to_float(expected) - + cl_half_to_float(startRefValues[i]), - gHalfRoundingMode); - } - } - } - else if constexpr ( - std::is_same_v< - HostDataType, - HOST_DOUBLE> || std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -1444,37 +1329,28 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { - if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) - return std::abs(cl_half_to_float(expected) - - cl_half_to_float(testValues[whichDestValue])) - > max_error; - } - else if constexpr ( - std::is_same_v< - HostDataType, - HOST_DOUBLE> || std::is_same_v) - { - if (whichDestValue == 0) - return std::abs((HostDataType)expected - - testValues[whichDestValue]) + return std::abs( + static_cast(expected + - static_cast( + testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) @@ -1497,7 +1373,7 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) @@ -1533,6 +1409,338 @@ public: queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (is_host_fp_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } +}; + +template +class CBasicTestFetchAddSpecialFloats + : public CBasicTestMemOrderScope { + + std::vector ref_vals; + +public: + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; + using CBasicTestMemOrderScope::DeclaredInProgram; + CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + // StartValue is used as an index divisor in the following test + // logic. It is set to the number of special values, which allows + // threads to be mapped deterministically onto the input data array. + // This enables repeated add operations arranged so that every + // special value is added to every other one (“all-to-all”). + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + StartValue(spec_vals.size()); + CBasicTestMemOrderScope::OldValueCheck(false); + } + else if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + + static std::vector &GetSpecialValues() + { + static std::vector special_values; + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) + { + const HostDataType test_value_zero = + static_cast(0.0f); + const HostDataType test_value_minus_zero = + static_cast(-0.0f); + const HostDataType test_value_without_fraction = + static_cast(2.0f); + const HostDataType test_value_with_fraction = + static_cast(2.2f); + + if (special_values.empty()) + { + special_values = { + static_cast(test_value_minus_zero), + static_cast(test_value_zero), + static_cast(test_value_without_fraction), + static_cast(test_value_with_fraction), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::signaling_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::signaling_NaN(), + std::numeric_limits::lowest(), + std::numeric_limits::min(), + std::numeric_limits::max(), + }; + + if constexpr (std::is_same_v) + { + if (0 != (CL_FP_DENORM & gDoubleFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + else if constexpr (std::is_same_v) + { + if (0 != (CL_FP_DENORM & gFloatFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + } + } + else if constexpr (std::is_same_v) + { + if (special_values.empty()) + { + special_values = { + 0xffff, 0x0000, 0x7c00, /*INFINITY*/ + 0xfc00, /*-INFINITY*/ + 0x8000, /*-0*/ + 0x7bff, /*HALF_MAX*/ + 0x0400, /*HALF_MIN*/ + 0x3c00, /* 1 */ + 0xbc00, /* -1 */ + 0x3555, /*nearest value to 1/3*/ + 0x3bff, /*largest number less than one*/ + 0xc000, /* -2 */ + 0xfbff, /* -HALF_MAX */ + 0x8400, /* -HALF_MIN */ + 0x4248, /* M_PI_H */ + 0xc248, /* -M_PI_H */ + 0xbbff, /* Largest negative fraction */ + }; + + if (0 != (CL_FP_DENORM & gHalfFPConfig)) + { + special_values.push_back(0x0001 /* Smallest denormal */); + special_values.push_back(0x03ff /* Largest denormal */); + } + } + } + return special_values; + } + + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v || std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.assign(threadCount, 0); + auto spec_vals = GetSpecialValues(); + + cl_uint total_cnt = 0; + while (total_cnt < threadCount) + { + cl_uint block_cnt = + std::min((cl_int)(threadCount - total_cnt), + (cl_int)spec_vals.size()); + memcpy(&ref_vals.at(total_cnt), spec_vals.data(), + sizeof(HostDataType) * block_cnt); + total_cnt += block_cnt; + } + } + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + + return true; + } + return false; + } + std::string ProgramCore() override + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v || std::is_same_v) + { + // The start_value variable (set by StartValue) is used + // as a divisor of the thread index when selecting the operand for + // atomic_fetch_add. This groups threads into blocks corresponding + // to the number of special values and implements an “all-to-all” + // addition pattern. As a result, each destination element is + // updated using different combinations of input values, enabling + // consistent comparison between host and device execution. + + return std::string(DataType().AddSubOperandTypeName()) + + " start_value = atomic_load_explicit(destMemory+tid, " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_store_explicit(destMemory+tid, oldValues[tid], " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_fetch_add" + + postfix + "(&destMemory[tid], (" + + DataType().AddSubOperandTypeName() + + ")oldValues[tid/(int)start_value]" + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v || std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], + MEMORY_ORDER_SEQ_CST); + host_atomic_fetch_add( + &destMemory[tid], + (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder()); + } + } + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override + { + expected = StartValue(); + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + expected = startRefValues[whichDestValue] + + startRefValues[whichDestValue / spec_vals.size()]; + } + else if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + expected = cl_half_from_float( + cl_half_to_float(startRefValues[whichDestValue]) + + cl_half_to_float( + startRefValues[whichDestValue / spec_vals.size()]), + gHalfRoundingMode); + } + + return true; + } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + const std::vector &startRefValues, + cl_uint whichDestValue) override + { + if constexpr (std::is_same_v) + { + return static_cast(expected) != testValues[whichDestValue]; + } + else + { + if (std::isnan(testValues[whichDestValue]) && std::isnan(expected)) + return false; + else + return expected != testValues[whichDestValue]; + } + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + startRefValues, + whichDestValue); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) + == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + else if constexpr (std::is_same_v) + { + if (DeclaredInProgram()) return 0; // skip test - not applicable + + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr ( std::is_same_v< @@ -1572,6 +1780,24 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchAddSpecialFloats + test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST( + error, + test_spec_double.Execute(deviceID, context, queue, num_elements)); + + CBasicTestFetchAddSpecialFloats + test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, + test_spec_float.Execute(deviceID, context, queue, num_elements)); + + CBasicTestFetchAddSpecialFloats + test_spec_half(TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST( + error, + test_spec_half.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, @@ -1647,6 +1873,23 @@ REGISTER_TEST(svm_atomic_fetch_add) true); } +template double kahan_sub(const std::vector &nums) +{ + return 0.0; +} +template <> double kahan_sub(const std::vector &nums) +{ + double sum = 0.0; + double compensation = 0.0; + for (double num : nums) + { + double y = -num - compensation; + double t = sum + y; + compensation = (t - sum) - y; + sum = t; + } + return sum; +} template class CBasicTestFetchSub : public CBasicTestMemOrderScope { @@ -1668,14 +1911,18 @@ public: useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_FLOAT> || std::is_same_v) + if constexpr (is_host_fp_v) { StartValue(0); CBasicTestMemOrderScope::OldValueCheck(false); + + // Narrow down range for half to avoid overflow to infinity + if constexpr (std::is_same_v) + { + min_range = -50.0; + max_range = 50.0; + } } } template @@ -1685,28 +1932,18 @@ public: for (auto it = begin; it != end; ++it) res = res - *it; return res; } - template - float subtract_halfs(Iterator begin, Iterator end) - { - cl_half res = 0; - for (auto it = begin; it != end; ++it) - { - res = cl_half_from_float(cl_half_to_float(res) - - cl_half_to_float(*it), - gHalfRoundingMode); - } - return cl_half_to_float(res); - } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); + for (cl_uint i = 0; i < threadCount; i++) - ref_vals[i] = get_random_float(min_range, max_range, d); + ref_vals[i] = (HostDataType)get_random_double(min_range, + max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); @@ -1717,72 +1954,33 @@ public: sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); - std::sort( - ref_vals.begin(), ref_vals.end(), - [](float a, float b) { return std::abs(a) < std::abs(b); }); + std::sort(ref_vals.begin(), ref_vals.end(), + [](HostDataType a, HostDataType b) { + return std::abs(a) < std::abs(b); + }); double precise = 0.0; - for (auto elem : ref_vals) precise += double(elem); + if constexpr (std::is_same_v) + precise = kahan_sub(ref_vals); + else + for (auto elem : ref_vals) precise += double(elem); sums.push_back(precise); + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); std::sort(sums.begin(), sums.end()); - max_error = - std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); - - // restore unsorted order - memcpy(ref_vals.data(), startRefValues, - sizeof(HostDataType) * ref_vals.size()); - } - else - { - memcpy(startRefValues, ref_vals.data(), - sizeof(HostDataType) * threadCount); - } - return true; - } - if constexpr (std::is_same_v) - { - if (threadCount > ref_vals.size()) - { - ref_vals.resize(threadCount); - for (cl_uint i = 0; i < threadCount; i++) - ref_vals[i] = cl_half_from_float( - get_random_float(min_range, max_range, d), - gHalfRoundingMode); - - memcpy(startRefValues, ref_vals.data(), - sizeof(HostDataType) * ref_vals.size()); - - // Estimate highest possible summation error for given set. - std::vector sums; - std::sort(ref_vals.begin(), ref_vals.end(), - [](cl_half a, cl_half b) { - return cl_half_to_float(a) < cl_half_to_float(b); - }); - - sums.push_back( - subtract_halfs(ref_vals.begin(), ref_vals.end())); - sums.push_back( - subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); - - std::sort(ref_vals.begin(), ref_vals.end(), - [](cl_half a, cl_half b) { - return std::abs(cl_half_to_float(a)) - < std::abs(cl_half_to_float(b)); - }); - - float precise = 0.f; - for (auto elem : ref_vals) precise -= cl_half_to_float(elem); - sums.push_back(precise); - sums.push_back( - subtract_halfs(ref_vals.begin(), ref_vals.end())); - sums.push_back( - subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); - - std::sort(sums.begin(), sums.end()); + assert(std::all_of(sums.begin(), sums.end(), + [](const HostDataType &val) { + return std::isfinite( + static_cast(val)); + }) + && "Infinite subtraction value detected!"); max_error = std::abs(sums.front() - sums.back()); + + log_info("Max allowed error for %u elements: %.10f\n", + threadCount, max_error); + // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); @@ -1801,10 +1999,7 @@ public: std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v) + if constexpr (is_host_fp_v) { return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" @@ -1826,10 +2021,7 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v) + if constexpr (is_host_fp_v) { host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); @@ -1851,25 +2043,12 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) expected -= startRefValues[i]; } - else if constexpr (std::is_same_v) - { - if (whichDestValue == 0) - { - for (cl_uint i = 0; i < threadCount; i++) - { - expected = cl_half_from_float( - cl_half_to_float(expected) - - cl_half_to_float(startRefValues[i]), - gHalfRoundingMode); - } - } - } else { for (cl_uint i = 0; i < threadCount; i++) @@ -1880,25 +2059,22 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { - if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) - return std::abs((HOST_ATOMIC_FLOAT)expected - - testValues[whichDestValue]) - > max_error; - } - else if constexpr (std::is_same_v) - { - if (whichDestValue == 0) - return std::abs(cl_half_to_float(expected) - - cl_half_to_float(testValues[whichDestValue])) + return std::abs( + static_cast(expected + - static_cast( + testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, @@ -1927,7 +2103,18 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) + == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) @@ -1953,10 +2140,7 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v) + if constexpr (is_host_fp_v) { return threadCount; } @@ -1991,6 +2175,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchSub test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST( + error, test_double.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( @@ -2872,10 +3061,7 @@ public: min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v) + if constexpr (is_host_fp_v) { CBasicTestMemOrderScope::OldValueCheck(false); @@ -2885,10 +3071,7 @@ public: { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { return " atomic_fetch_min" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" @@ -2907,10 +3090,7 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); @@ -2965,19 +3145,7 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) - { - if (whichDestValue == 0) - { - for (cl_uint i = 0; i < threadCount; i++) - { - if (cl_half_to_float(startRefValues[i]) - < cl_half_to_float(expected)) - expected = startRefValues[i]; - } - } - } - else if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -2995,36 +3163,36 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { - if (std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: - IsTestNotAsExpected(expected, testValues, whichDestValue); + IsTestNotAsExpected(expected, testValues, startRefValues, + whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if (std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same::value) + if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { - log_error("Thread %d found %d mismatch(es)\n", i, - (cl_uint)refValues[i]); + log_error( + "Thread %d found %lf mismatch(es), start value=%lf\n", + i, (double)refValues[i], (double)StartValue()); correct = false; } } @@ -3081,10 +3249,252 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } +}; + +template +class CBasicTestFetchMinSpecialFloats + : public CBasicTestMemOrderScope { + + std::vector ref_vals; + +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; + CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + // StartValue is used as an index divisor in the following test + // logic. It is set to the number of special values, which allows + // threads to be mapped deterministically onto the input data array. + // This enables repeated add operations arranged so that every + // special value is added to every other one (“all-to-all”). + + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + StartValue(spec_vals.size()); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + + static std::vector &GetSpecialValues() + { + static std::vector special_values; + if constexpr (std::is_same_v) + { + const HostDataType test_value_zero = + static_cast(0.0f); + const HostDataType test_value_minus_zero = + static_cast(-0.0f); + const HostDataType test_value_without_fraction = + static_cast(2.0f); + const HostDataType test_value_with_fraction = + static_cast(2.2f); + + if (special_values.empty()) + { + special_values = { + static_cast(test_value_minus_zero), + static_cast(test_value_zero), + static_cast(test_value_without_fraction), + static_cast(test_value_with_fraction), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::signaling_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::signaling_NaN(), + std::numeric_limits::lowest(), + std::numeric_limits::min(), + std::numeric_limits::max(), + }; + + if (0 != (CL_FP_DENORM & gFloatFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + } + + return special_values; + } + + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.assign(threadCount, 0); + auto spec_vals = GetSpecialValues(); + + cl_uint total_cnt = 0; + while (total_cnt < threadCount) + { + cl_uint block_cnt = + std::min((cl_int)(threadCount - total_cnt), + (cl_int)spec_vals.size()); + memcpy(&ref_vals.at(total_cnt), spec_vals.data(), + sizeof(HostDataType) * block_cnt); + total_cnt += block_cnt; + } + } + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + + return true; + } + return false; + } + std::string ProgramCore() override + { + // The start_value variable (set by StartValue) is used + // as a divisor of the thread index when selecting the operand for + // atomic_fetch_add. This groups threads into blocks corresponding + // to the number of special values and implements an “all-to-all” + // addition pattern. As a result, each destination element is + // updated using different combinations of input values, enabling + // consistent comparison between host and device execution. + + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(DataType().AddSubOperandTypeName()) + + " start_value = atomic_load_explicit(destMemory+tid, " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_store_explicit(destMemory+tid, oldValues[tid], " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_fetch_min" + + postfix + "(&destMemory[tid], (" + + DataType().AddSubOperandTypeName() + + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n"; + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + auto spec_vals = GetSpecialValues(); + host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], + MEMORY_ORDER_SEQ_CST); + host_atomic_fetch_min(&destMemory[tid], + (HostDataType)oldValues[tid / spec_vals.size()], + MemoryOrder()); + } + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override + { + expected = StartValue(); + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + expected = + std::min(startRefValues[whichDestValue], + startRefValues[whichDestValue / spec_vals.size()]); + } + return true; + } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + const std::vector &startRefValues, + cl_uint whichDestValue) override + { + if (testValues[whichDestValue] != expected) + { + auto spec_vals = GetSpecialValues(); + // special cases + // min(-0, +0) = min(+0, -0) = +0 or -0, + if (((startRefValues[whichDestValue] == -0.f) + && (startRefValues[whichDestValue / spec_vals.size()] == 0.f)) + || ((startRefValues[whichDestValue] == 0.f) + && (startRefValues[whichDestValue / spec_vals.size()] + == -0.f))) + return false; + else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) + || is_qnan(startRefValues[whichDestValue])) + { + // min(x, qNaN) = min(qNaN, x) = x, + // min(qNaN, qNaN) = qNaN, + if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) + && is_qnan(startRefValues[whichDestValue])) + return !is_qnan(testValues[whichDestValue]); + else if (is_qnan( + startRefValues[whichDestValue / spec_vals.size()])) + return !std::isnan(testValues[whichDestValue]) + && testValues[whichDestValue] + != startRefValues[whichDestValue]; // NaN != NaN always + // true + else + return !std::isnan(testValues[whichDestValue]) + && testValues[whichDestValue] + != startRefValues[whichDestValue / spec_vals.size()]; + } + else if (is_snan(startRefValues[whichDestValue / spec_vals.size()]) + || is_snan(startRefValues[whichDestValue])) + { + // min(x, sNaN) = min(sNaN, x) = NaN or x, and + // min(NaN, sNaN) = min(sNaN, NaN) = NaN + if (std::isnan(testValues[whichDestValue]) + || testValues[whichDestValue] + == startRefValues[whichDestValue] + || testValues[whichDestValue] + == startRefValues[whichDestValue / spec_vals.size()]) + return false; + } + } + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + startRefValues, + whichDestValue); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) { return threadCount; } @@ -3119,6 +3529,12 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchMinSpecialFloats + test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, + test_spec_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( @@ -3212,21 +3628,11 @@ public: useSVM), min_range(-999.0), max_range(999.0) { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v) + StartValue(DataType().MinValue()); + if constexpr (is_host_fp_v) { CBasicTestMemOrderScope::OldValueCheck(false); - if constexpr (std::is_same_v) - StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode)); - else - StartValue(-DataType().MaxValue()); - } - else - { - StartValue(DataType().MinValue()); } } std::string ProgramCore() override @@ -3255,10 +3661,7 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); @@ -3274,23 +3677,12 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { for (cl_uint i = 0; i < threadCount; i++) { - startRefValues[i] = cl_half_from_float( - get_random_float(min_range, max_range, d), - gHalfRoundingMode); - } - } - else if constexpr ( - std::is_same_v< - HostDataType, - HOST_FLOAT> || std::is_same_v) - { - for (cl_uint i = 0; i < threadCount; i++) - { - startRefValues[i] = get_random_float(min_range, max_range, d); + startRefValues[i] = static_cast( + get_random_float(min_range, max_range, d)); } } else @@ -3313,19 +3705,7 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) - { - if (whichDestValue == 0) - { - for (cl_uint i = 0; i < threadCount; i++) - { - if (cl_half_to_float(startRefValues[i]) - > cl_half_to_float(expected)) - expected = startRefValues[i]; - } - } - } - else if constexpr (std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -3343,36 +3723,36 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { - if (std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: - IsTestNotAsExpected(expected, testValues, whichDestValue); + IsTestNotAsExpected(expected, testValues, startRefValues, + whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if (std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { - log_error("Thread %d found %d mismatch(es)\n", i, - (cl_uint)refValues[i]); + log_error( + "Thread %d found %lf mismatch(es), start value=%lf\n", + i, (double)refValues[i], (double)StartValue()); correct = false; } } @@ -3429,10 +3809,7 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr ( - std::is_same_v< - HostDataType, - HOST_HALF> || std::is_same_v || std::is_same_v) + if constexpr (is_host_fp_v) { return threadCount; } diff --git a/test_conformance/common/directx_wrapper/directx_wrapper.hpp b/test_conformance/common/directx_wrapper/directx_wrapper.hpp index dec85b78..fb758880 100644 --- a/test_conformance/common/directx_wrapper/directx_wrapper.hpp +++ b/test_conformance/common/directx_wrapper/directx_wrapper.hpp @@ -26,9 +26,9 @@ class DirectXWrapper { public: DirectXWrapper(); - ID3D12Device* getDXDevice() const; - ID3D12CommandQueue* getDXCommandQueue() const; - ID3D12CommandAllocator* getDXCommandAllocator() const; + [[nodiscard]] ID3D12Device* getDXDevice() const; + [[nodiscard]] ID3D12CommandQueue* getDXCommandQueue() const; + [[nodiscard]] ID3D12CommandAllocator* getDXCommandAllocator() const; protected: ComPtr dx_device = nullptr; @@ -39,7 +39,7 @@ protected: class DirectXFenceWrapper { public: DirectXFenceWrapper(ID3D12Device* dx_device); - ID3D12Fence* operator*() const { return dx_fence.Get(); } + [[nodiscard]] ID3D12Fence* get() const { return dx_fence.Get(); } private: ComPtr dx_fence = nullptr; diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp index a474102d..b1f1b8c9 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp @@ -104,7 +104,8 @@ VK_FUNC_DECL(vkGetImageSubresourceLayout) \ VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT) \ VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT) \ - VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties) + VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties) \ + VK_FUNC_DECL(vkGetPhysicalDeviceFeatures2) #define VK_WINDOWS_FUNC_LIST \ VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \ VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR) \ @@ -209,5 +210,6 @@ #define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT #define vkGetPhysicalDeviceExternalBufferProperties \ _vkGetPhysicalDeviceExternalBufferProperties +#define vkGetPhysicalDeviceFeatures2 _vkGetPhysicalDeviceFeatures2 #endif //_vulkan_api_list_hpp_ diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 7254742d..08506d1c 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -147,6 +147,7 @@ VulkanInstance::VulkanInstance(bool useValidationLayers) // return WAIVED; } + VK_GET_NULL_INSTANCE_PROC_ADDR(vkGetPhysicalDeviceFeatures2); VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion); VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties); VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance); @@ -612,7 +613,8 @@ VulkanDevice::VulkanDevice(const VulkanDevice &device) VulkanDevice::VulkanDevice( const VulkanPhysicalDevice &physicalDevice, - const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap) + const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap, + bool useShaderInt8) : m_physicalDevice(physicalDevice), m_vkDevice(NULL) { uint32_t maxQueueCount = 0; @@ -676,7 +678,55 @@ VulkanDevice::VulkanDevice( enabledExtensionNameList.data(); vkDeviceCreateInfo.pEnabledFeatures = NULL; - vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice); + if (useShaderInt8) + { + VkPhysicalDeviceShaderFloat16Int8Features int8Features{}; + int8Features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + + VkPhysicalDevice8BitStorageFeatures storage8Features{}; + storage8Features.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + + int8Features.pNext = &storage8Features; + + VkPhysicalDeviceFeatures2 features2{}; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = &int8Features; + + vkGetPhysicalDeviceFeatures2(physicalDevice, &features2); + + if (!int8Features.shaderInt8 + || !storage8Features.storageBuffer8BitAccess) + { + throw std::runtime_error("shaderInt8 not supported!\n"); + } + + VkPhysicalDevice8BitStorageFeatures storage8Enable{}; + storage8Enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + storage8Enable.storageBuffer8BitAccess = VK_TRUE; + + VkPhysicalDeviceShaderFloat16Int8Features int8Enable{}; + int8Enable.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + int8Enable.shaderInt8 = VK_TRUE; + int8Enable.pNext = &storage8Enable; + + vkDeviceCreateInfo.pNext = &int8Enable; + + enabledExtensionNameList.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); + vkDeviceCreateInfo.ppEnabledExtensionNames = + enabledExtensionNameList.data(); + vkDeviceCreateInfo.enabledExtensionCount = + (uint32_t)enabledExtensionNameList.size(); + + vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice); + } + else + { + vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice); + } for (uint32_t qfIdx = 0; qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size(); @@ -1071,7 +1121,8 @@ VulkanComputePipeline::VulkanComputePipeline( VulkanComputePipeline::VulkanComputePipeline( const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout, - const VulkanShaderModule &shaderModule, const std::string &entryFuncName) + const VulkanShaderModule &shaderModule, const std::string &entryFuncName, + const VkSpecializationInfo *spec) : VulkanPipeline(device) { VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {}; @@ -1084,6 +1135,8 @@ VulkanComputePipeline::VulkanComputePipeline( vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str(); vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL; + if (spec) vkPipelineShaderStageCreateInfo.pSpecializationInfo = spec; + VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {}; vkComputePipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp index 9cb031f5..25e1d409 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp @@ -148,7 +148,8 @@ public: VulkanDevice( const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(), const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap = - getDefaultVulkanQueueFamilyToQueueCountMap()); + getDefaultVulkanQueueFamilyToQueueCountMap(), + bool useShaderInt8 = false); virtual ~VulkanDevice(); const VulkanPhysicalDevice &getPhysicalDevice() const; VulkanQueue & @@ -296,7 +297,8 @@ public: VulkanComputePipeline(const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout, const VulkanShaderModule &shaderModule, - const std::string &entryFuncName = "main"); + const std::string &entryFuncName = "main", + const VkSpecializationInfo *spec = nullptr); virtual ~VulkanComputePipeline(); VulkanPipelineBindPoint getPipelineBindPoint() const; }; diff --git a/test_conformance/compiler/test_build_helpers.cpp b/test_conformance/compiler/test_build_helpers.cpp index e8ef3ee8..46a54efd 100644 --- a/test_conformance/compiler/test_build_helpers.cpp +++ b/test_conformance/compiler/test_build_helpers.cpp @@ -16,6 +16,7 @@ #include "testBase.h" #include "harness/testHarness.h" #include "harness/parseParameters.h" +#include "harness/stringHelpers.h" #include #include @@ -1009,3 +1010,113 @@ REGISTER_TEST(get_program_build_info) return 0; } + +cl_int test_kernel_name_len(cl_context context, cl_device_id device, + const cl_uint length) +{ + cl_int error = CL_SUCCESS; + + std::string buf = { "abcdefghijklmnopqrstuvwxyz" }; + std::string name; + name.reserve(length); + + for (cl_uint i = 0; i < length; ++i) name += buf[i % buf.size()]; + + const char *sample_name_size_test_kernel = R"( + __kernel void %s(int src, __global int *dst) + { + dst[0]=src; + } + )"; + std::string program_source = + str_sprintf(std::string(sample_name_size_test_kernel), name.c_str()); + const char *ptr = program_source.c_str(); + + { + clProgramWrapper program; + clKernelWrapper kernel; + + error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + name.c_str()); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to create program with length of " + "kernel name " + "%d : %s! (%s from %s:%d)\n", + length, name.c_str(), IGetErrorString(error), __FILE__, + __LINE__); + return TEST_FAIL; + } + + // query kernel name + size_t kernel_name_size = 0; + error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr, + &kernel_name_size); + test_error(error, "clGetKernelInfo (size) failed"); + + std::vector kernel_name(kernel_name_size); + error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, + kernel_name_size, kernel_name.data(), nullptr); + test_error(error, "clGetKernelInfo (name) failed"); + + if (name != std::string(kernel_name.data())) + { + log_error("Kernel name mismatch! expected=%s got=%s\n", + name.c_str(), kernel_name.data()); + return TEST_FAIL; + } + } + + if (gCompilationMode == kOnline) + { + clProgramWrapper programObj = + clCreateProgramWithSource(context, 1, &ptr, nullptr, &error); + test_error(error, "clCreateProgramWithSource failed (compile)"); + + error = clCompileProgram(programObj, 0, nullptr, nullptr, 0, nullptr, + nullptr, nullptr, nullptr); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to compile program with length of " + "kernel name " + "%d : %s! (%s from %s:%d)\n", + length, name.c_str(), IGetErrorString(error), __FILE__, + __LINE__); + return TEST_FAIL; + } + + clProgramWrapper linkedProgram = + clLinkProgram(context, 0, nullptr, nullptr, 1, &programObj, nullptr, + nullptr, &error); + if (error != CL_SUCCESS) + { + log_error("ERROR: Unable to link program with length of " + "kernel name " + "%d : %s! (%s from %s:%d)\n", + length, name.c_str(), IGetErrorString(error), __FILE__, + __LINE__); + return TEST_FAIL; + } + + clKernelWrapper kernel = + clCreateKernel(linkedProgram, name.c_str(), &error); + test_error(error, "clCreateKernel after link failed"); + } + + return TEST_PASS; +} + +REGISTER_TEST(kernel_name_size) +{ + for (cl_uint len = 32; len <= 2048; len *= 2) + { + cl_int status = test_kernel_name_len(context, device, len); + if (status == TEST_FAIL) + { + log_error("ERROR: test_kernel_name_len failed with length %d\n", + len); + return TEST_FAIL; + } + } + return TEST_PASS; +} diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp index daba1b0e..15348e06 100644 --- a/test_conformance/compiler/test_build_options.cpp +++ b/test_conformance/compiler/test_build_options.cpp @@ -14,7 +14,11 @@ // limitations under the License. // #include "testBase.h" +#include "harness/kernelHelpers.h" #include "harness/os_helpers.h" +#include "harness/testHarness.h" + +#include const char *preprocessor_test_kernel[] = { "__kernel void sample_test(__global int *dst)\n" @@ -42,26 +46,22 @@ const char *include_test_kernel[] = { "\n" "}\n" }; -const char *options_test_kernel[] = { - "__kernel void sample_test(__global float *src, __global int *dst)\n" - "{\n" - " size_t tid = get_global_id(0);\n" - " dst[tid] = (int)src[tid];\n" - "}\n" -}; +const char *options_test_kernel[] = { "__kernel void sample_test() {}\n" }; -const char *optimization_options[] = { - "-cl-single-precision-constant", - "-cl-denorms-are-zero", - "-cl-opt-disable", - "-cl-mad-enable", - "-cl-no-signed-zeros", - "-cl-unsafe-math-optimizations", - "-cl-finite-math-only", - "-cl-fast-relaxed-math", - "-w", - "-Werror", - }; +std::array optimization_options{ + std::pair{ "-cl-single-precision-constant", Version(1, 0) }, + std::pair{ "-cl-denorms-are-zero", Version(1, 0) }, + std::pair{ "-cl-opt-disable", Version(1, 0) }, + std::pair{ "-cl-mad-enable", Version(1, 0) }, + std::pair{ "-cl-no-signed-zeros", Version(1, 0) }, + std::pair{ "-cl-unsafe-math-optimizations", Version(1, 0) }, + std::pair{ "-cl-finite-math-only", Version(1, 0) }, + std::pair{ "-cl-fast-relaxed-math", Version(1, 0) }, + std::pair{ "-w", Version(1, 0) }, + std::pair{ "-Werror", Version(1, 0) }, + std::pair{ "-cl-uniform-work-group-size", Version(2, 0) }, + std::pair{ "-cl-no-subgroup-ifp", Version(2, 1) }, +}; cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue ) { @@ -93,31 +93,44 @@ REGISTER_TEST(options_build_optimizations) int error; cl_build_status status; - for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) { + Version version = get_device_cl_version(device); - clProgramWrapper program; - error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]); - if( program == NULL || error != CL_SUCCESS ) + for (const auto &optimization_option : optimization_options) + { + if (version < optimization_option.second) { - log_error( "ERROR: Unable to create reference program!\n" ); + continue; + } + + auto build_options = std::string("-cl-std=CL") + + get_max_OpenCL_C_for_context(context).to_string() + " " + + optimization_option.first; + const char *option = build_options.c_str(); + clProgramWrapper program; + error = create_single_kernel_helper_create_program( + context, &program, 1, options_test_kernel, option); + if (program == NULL || error != CL_SUCCESS) + { + log_error("ERROR: Unable to create reference program!\n"); return -1; } /* Build with the macro defined */ - log_info("Testing optimization option '%s'\n", optimization_options[i]); - error = clBuildProgram(program, 1, &device, optimization_options[i], - NULL, NULL); - test_error( error, "Test program did not properly build" ); + log_info("Testing optimization option '%s'\n", option); + error = clBuildProgram(program, 1, &device, option, NULL, NULL); + test_error(error, "Test program did not properly build"); error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL); - test_error( error, "Unable to get program build status" ); + test_error(error, "Unable to get program build status"); - if( (int)status != CL_BUILD_SUCCESS ) + if ((int)status != CL_BUILD_SUCCESS) { - log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]); - print_error( error, "Failed to build with optimization defined") - return -1; + log_info( + "Building with optimization option '%s' failed to compile!\n", + option); + print_error(error, + "Failed to build with optimization defined") return -1; } } return 0; @@ -415,3 +428,53 @@ REGISTER_TEST(options_denorm_cache) return 0; } + +REGISTER_TEST(options_uniform_work_group_size) +{ + if (get_device_cl_version(device) < Version(2, 0)) + { + return TEST_SKIPPED_ITSELF; + } + std::string build_options = "-cl-std=CL" + + get_max_OpenCL_C_for_context(context).to_string() + + " -cl-uniform-work-group-size"; + const char *options = build_options.c_str(); + clProgramWrapper program; + int error = create_single_kernel_helper_create_program( + context, &program, 1, options_test_kernel, options); + if (program == NULL || error != CL_SUCCESS) + { + log_error("Error: Unable to create reference program!\n"); + return TEST_FAIL; + } + error = clBuildProgram(program, 1, &device, options, NULL, NULL); + test_error(error, "Test program did not properly build"); + + clKernelWrapper kernel = clCreateKernel(program, "sample_test", &error); + test_error(error, "Unable to create kernel"); + + size_t global_work_size = 4; + size_t uniform_local_work_size = 2; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, + &uniform_local_work_size, 0, NULL, NULL); + test_error(error, + "Unable to enqueue NDRange kernel with uniform work group size"); + error = clFinish(queue); + test_error(error, "Unable to finish"); + + size_t non_uniform_local_work_size = 3; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, + &non_uniform_local_work_size, 0, NULL, NULL); + + if (error != CL_INVALID_WORK_GROUP_SIZE) + { + log_error( + "Error: expected error 'CL_INVALID_WORK_GROUP_SIZE' (got '%s') " + "trying to enqueue kernel compiled with '%s' with non-uniform work " + "group size\n", + IGetErrorString(error), options); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index b5a134d1..672291ff 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -3983,6 +3983,9 @@ REGISTER_TEST(multiple_build_program) error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads, NULL, 0, NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); } { @@ -4001,10 +4004,10 @@ REGISTER_TEST(multiple_build_program) error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads, NULL, 0, NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); - } - error = clFinish(queue); - test_error(error, "clFinish failed"); + error = clFinish(queue); + test_error(error, "clFinish failed"); + } std::vector test_values(num_threads, 0); error = clEnqueueReadBuffer(queue, out_stream_0, true, 0, diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index b9cfb608..04abde9a 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -56,6 +56,7 @@ const char *known_extensions[] = { "cl_khr_integer_dot_product", "cl_khr_subgroup_rotate", "cl_khr_kernel_clock", + "cl_khr_icd_unloadable", // API-only extensions after this point. If you add above here, modify // first_API_extension below. "cl_khr_icd", diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index 0c868764..b3f1098d 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y) } #endif -#ifdef __PPC__ +#if defined(__PPC__) || defined(__riscv) float ppc_mul(float a, float b) { float p; @@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device ) // turn that off f3[i] = sse_mul(q, q2); f4[i] = sse_mul(-q, q2); -#elif defined(__PPC__) - // None of the current generation PPC processors support HW - // FTZ, emulate it in sw. +#elif (defined(__PPC__) || defined(__riscv)) + // RISC-V CPUs with default 'f' fp32 extension do not support + // enabling/disabling FTZ mode, subnormals are always handled + // without FTZ. None of the current generation PPC processors + // support HW FTZ, emulate it in sw. f3[i] = ppc_mul(q, q2); f4[i] = ppc_mul(-q, q2); #else @@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device ) skipTest[j][i] = (bufSkip[i] || (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))))); -#if defined(__PPC__) - // Since the current Power processors don't emulate flush to zero in HW, - // it must be emulated in SW instead. +#if defined(__PPC__) || defined(__riscv) + // Since the current Power processors don't emulate flush to + // zero in HW, it must be emulated in SW instead. (same for + // RISC-V CPUs with 'f' extension) if (gForceFTZ) { if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f)) @@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device ) } } - double *f = (double*) buf1; double *f2 = (double*) buf2; double *f3 = (double*) buf3_double; diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 6846f780..496ea730 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); uint64_t GetTime(void); void WriteInputBufferComplete(void *); -void *FlushToZero(void); -void UnFlushToZero(void *); } struct CalcRefValsBase diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index 10d7b86d..eb0ea40f 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -340,7 +340,7 @@ static const char* enqueue_block_capture_event_profiling_info_before_execution[] set_user_event_status(user_evt, CL_COMPLETE); - void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; + void (^checkBlock) (void) = ^{ check_res(tid, value, res); }; enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp index 8f71ac4e..4307058a 100644 --- a/test_conformance/device_execution/enqueue_ndrange.cpp +++ b/test_conformance/device_execution/enqueue_ndrange.cpp @@ -129,8 +129,8 @@ static const char *helper_ndrange_2d_glo[] = { "}" NL, "" NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, @@ -156,8 +156,8 @@ static const char *helper_ndrange_2d_loc[] = { "}" NL, "" NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, @@ -193,8 +193,8 @@ static const char *helper_ndrange_2d_ofs[] = { "}" NL, "" NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, @@ -233,8 +233,8 @@ static const char *helper_ndrange_3d_glo[] = { "}" NL, "" NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, @@ -266,8 +266,8 @@ static const char *helper_ndrange_3d_loc[] = { "}" NL, "" NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, @@ -306,8 +306,8 @@ static const char *helper_ndrange_3d_ofs[] = { "}" NL, "" NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, " - "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* " - "val, __global uint* ofs_arr)" NL, + "__global uint* glob_size_arr, __global uint* loc_size_arr, __global " + "atomic_uint* val, __global uint* ofs_arr)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL, diff --git a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp index 97872613..8e8ced30 100644 --- a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp +++ b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp @@ -14,7 +14,10 @@ // #include "harness/typeWrappers.h" +#include "harness/extensionHelpers.h" #include +#include +#include #define BUF_SIZE 1024 #define BUF_SIZE_STR "1024" @@ -310,8 +313,8 @@ private: // A basic buffer used to pass the other buffer's address. error = clEnqueueWriteBuffer(queue, buffer_in_long, CL_TRUE, // block - 0, sizeof(cl_long), &DeviceAddrFromAPI, - 0, NULL, NULL); + 0, sizeof(DeviceAddrFromAPI), + &DeviceAddrFromAPI, 0, NULL, NULL); test_error_fail(error, "clEnqueueWriteBuffer of dev_addr_buffer failed\n"); @@ -322,9 +325,9 @@ private: &buffer_out_int); test_error_fail(error, "clSetKernelArg 1 failed\n"); - error = clSetKernelExecInfo(ind_access_kernel, - CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, - sizeof(void *), &DeviceAddrFromAPI); + error = clSetKernelExecInfo( + ind_access_kernel, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, + sizeof(DeviceAddrFromAPI), &DeviceAddrFromAPI); test_error_fail(error, "Setting indirect access for " "device ptrs failed!\n"); @@ -421,6 +424,8 @@ private: REGISTER_TEST(private_address) { + REQUIRE_EXTENSION("cl_ext_buffer_device_address"); + BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest( device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT); @@ -435,3 +440,180 @@ REGISTER_TEST(private_address) return TEST_PASS; } + +REGISTER_TEST(private_address_multi_device) +{ + REQUIRE_EXTENSION("cl_ext_buffer_device_address"); + + cl_platform_id platform = 0; + cl_int error = CL_SUCCESS; + cl_uint numDevices = 0; + + error = clGetPlatformIDs(1, &platform, NULL); + test_error_ret(error, "Unable to get platform\n", TEST_FAIL); + + /* Get some devices */ + error = + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices); + test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL); + + if (numDevices < 2) + { + log_info( + "WARNING: multi device test unable to get multiple devices via " + "CL_DEVICE_TYPE_ALL (got %u devices). Skipping test...\n", + numDevices); + return TEST_SKIPPED_ITSELF; + } + + std::vector devices(numDevices); + error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices, + devices.data(), &numDevices); + test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL); + + GET_PFN(devices[0], clSetKernelArgDevicePointerEXT); + + cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, + (cl_context_properties)platform, 0 }; + clContextWrapper ctx = clCreateContext( + properties, numDevices, devices.data(), nullptr, nullptr, &error); + test_error_ret(error, "Unable to create context\n", TEST_FAIL); + + /* Create buffer */ + cl_mem_properties props[] = { CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT, CL_TRUE, + 0 }; + clMemWrapper buffer = clCreateBufferWithProperties( + ctx, props, CL_MEM_READ_WRITE, 16, nullptr, &error); + std::vector addresses(numDevices); + error = + clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT, + sizeof(cl_mem_device_address_ext) * addresses.size(), + addresses.data(), nullptr); + test_error_ret(error, "clGetMemObjectInfo failed\n", TEST_FAIL); + + std::vector queues(numDevices); + for (cl_uint i = 0; i < numDevices; ++i) + { + queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &error); + test_error_ret(error, "Unable to create command queue\n", TEST_FAIL); + } + static std::string source = R"( + void kernel test_device_address( + global ulong* ptr, + ulong value) + { + *ptr = value; + })"; + + clProgramWrapper program; + clKernelWrapper kernel; + const char *source_ptr = source.data(); + error = create_single_kernel_helper(ctx, &program, &kernel, 1, &source_ptr, + "test_device_address"); + test_error(error, "Unable to create test kernel"); + for (cl_uint i = 0; i < numDevices; ++i) + { + cl_command_queue queue = queues[i]; + + error = clSetKernelArgDevicePointerEXT(kernel, 0, 0); + test_error_fail(error, + "clSetKernelArgDevicePointerEXT failed with NULL " + "pointer argument\n"); + + error = clSetKernelArgDevicePointerEXT(kernel, 0, addresses[i] + 8); + test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL); + + const cl_ulong pattern = 0xAABBCCDDEEFF0011 + i; + error = clSetKernelArg(kernel, 1, sizeof(pattern), &pattern); + test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL); + + size_t gwo = 0; + size_t gws = 1; + size_t lws = 1; + error = clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0, + nullptr, nullptr); + test_error_ret(error, "Unable to enqueue kernel\n", TEST_FAIL); + + error = clFinish(queue); + test_error_ret(error, "clFinish failed\n", TEST_FAIL); + + std::vector results(2, 0); + error = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, + results.size() * sizeof(cl_ulong), + results.data(), 0, nullptr, nullptr); + test_error_ret(error, "clEnqueueReadBuffer failed\n", TEST_FAIL); + + if (results[1] != pattern) + test_fail("Test value doesn't match expected value\n"); + } + return TEST_PASS; +} + +REGISTER_TEST(negative_private_address) +{ + REQUIRE_EXTENSION("cl_ext_buffer_device_address"); + + cl_int error = CL_SUCCESS; + + GET_PFN(device, clSetKernelArgDevicePointerEXT); + + /* Create buffer */ + clMemWrapper buffer = clCreateBufferWithProperties( + context, nullptr, CL_MEM_READ_WRITE, 16, nullptr, &error); + cl_mem_device_address_ext address; + error = clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT, + sizeof(cl_mem_device_address_ext), &address, + nullptr); + test_failure_error_ret( + error, CL_INVALID_OPERATION, + "clGetMemObjectInfo should return CL_INVALID_OPERATION when: " + "\"the buffer was not created with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT\"", + TEST_FAIL); + + static std::string source = R"( + void kernel test_device_address( + global ulong* ptr, + local ulong* ptr2, + ulong value) + { + *ptr = value; + })"; + + clProgramWrapper program; + clKernelWrapper kernel; + const char *source_ptr = source.data(); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &source_ptr, "test_device_address"); + test_error(error, "Unable to create test kernel"); + + error = clSetKernelArgDevicePointerEXT(nullptr, 0, 0); + test_failure_error_ret( + error, CL_INVALID_KERNEL, + "clSetKernelArgDevicePointerEXT should return CL_INVALID_KERNEL when: " + "\"kernel is not a valid kernel object\"", + TEST_FAIL); + + error = clSetKernelArgDevicePointerEXT(kernel, 1, 0x15465); + test_failure_error_ret( + error, CL_INVALID_ARG_INDEX, + "clSetKernelArgDevicePointerEXT should return " + "CL_INVALID_ARG_INDEX when: " + "\"the expected kernel argument is not a pointer to global memory\"", + TEST_FAIL); + + error = clSetKernelArgDevicePointerEXT(kernel, 2, 0x15465); + test_failure_error_ret(error, CL_INVALID_ARG_INDEX, + "clSetKernelArgDevicePointerEXT should return " + "CL_INVALID_ARG_INDEX when: " + "\"the expected kernel argument is not a pointer\"", + TEST_FAIL); + + error = clSetKernelArgDevicePointerEXT(kernel, 3, 0x15465); + test_failure_error_ret(error, CL_INVALID_ARG_INDEX, + "clSetKernelArgDevicePointerEXT should return " + "CL_INVALID_ARG_INDEX when: " + "\"arg_index is not a valid argument index\"", + TEST_FAIL); + + return TEST_PASS; +} diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp index aee287fd..56be5c65 100644 --- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp +++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp @@ -42,8 +42,8 @@ int test_cxx_for_opencl(cl_device_id device, cl_context context, execute(*p, x); })"; - error = create_single_kernel_helper_with_build_options( - context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++"); + error = create_single_kernel_helper(context, &program, &kernel1, 1, + &kernel_sstr, "k1", "-cl-std=CLC++"); test_error(error, "Failed to create k1 kernel"); kernel2 = clCreateKernel(program, "k2", &error); diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 9e54fecc..1a50db93 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES command_buffer_test_event_info.cpp command_buffer_finalize.cpp command_buffer_pipelined_enqueue.cpp + command_buffer_kernel_attributes.cpp negative_command_buffer_finalize.cpp negative_command_buffer_svm_mem.cpp negative_command_buffer_copy_image.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index aed183ff..bb085035 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES mutable_command_work_groups.cpp mutable_command_work_dim.cpp mutable_command_update_state.cpp + mutable_command_defer_arguments.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 59f07dd7..917183c8 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -76,12 +76,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest bool Skip() override { - bool extension_avaliable = - is_extension_available(device, - "cl_khr_command_buffer_mutable_dispatch") - == true; + bool extension_available = is_extension_available( + device, "cl_khr_command_buffer_mutable_dispatch"); - if (extension_avaliable) + if (extension_available) { Version device_version = get_device_cl_version(device); if ((device_version >= Version(3, 0)) @@ -91,12 +89,12 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest cl_version extension_version = get_extension_version( device, "cl_khr_command_buffer_mutable_dispatch"); - if (extension_version != CL_MAKE_VERSION(0, 9, 4)) + if (extension_version != CL_MAKE_VERSION(0, 9, 5)) { log_info("cl_khr_command_buffer_mutable_dispatch version " - "0.9.4 is " + "0.9.5 is " "required to run the test, skipping.\n "); - extension_avaliable = false; + extension_available = false; } } } @@ -109,7 +107,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest sizeof(mutable_capabilities), &mutable_capabilities, nullptr) && mutable_capabilities != 0; - return !mutable_support || !extension_avaliable + return !mutable_support || !extension_available || BasicCommandBufferTest::Skip(); } diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp new file mode 100644 index 00000000..2e10cbf8 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp @@ -0,0 +1,219 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "mutable_command_basic.h" + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// Mutable dispatch test which handles the case where all the arguments of a +// kernel aren't set when a kernel is initially added to a mutable +// command-buffer, but deferred until an update is made to the command to set +// them before command-buffer enqueue. +struct MutableDispatchDeferArguments : public BasicMutableCommandBufferTest +{ + MutableDispatchDeferArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + if (BasicMutableCommandBufferTest::Skip()) return true; + cl_mutable_dispatch_fields_khr mutable_capabilities; + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + // require mutable arguments capability + return !mutable_support; + } + + cl_int SetUpKernel() override + { + // Create kernel + const char *defer_args_kernel = + R"( + __kernel void defer_args_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + })"; + + cl_int error = + create_single_kernel_helper(context, &program, &kernel, 1, + &defer_args_kernel, "defer_args_test"); + test_error(error, "Creating kernel failed"); + return CL_SUCCESS; + } + + cl_int SetUpKernelArgs() override + { + // Create and initialize buffers + MTdataHolder d(gRandomSeed); + + src_data.resize(num_elements); + for (size_t i = 0; i < num_elements; i++) + src_data[i] = (cl_int)genrand_int32(d); + + cl_int error = CL_SUCCESS; + in_mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), src_data.data(), + &error); + test_error(error, "Creating src buffer"); + + out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating initial dst buffer failed"); + + // Only set a single kernel argument, leaving argument at index 1 unset + error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem); + test_error(error, "Unable to set src kernel arguments"); + + return CL_SUCCESS; + } + + bool verify_state(cl_command_buffer_state_khr expected) + { + cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0); + cl_int error = clGetCommandBufferInfoKHR( + command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), &state, + nullptr); + if (error != CL_SUCCESS) + { + log_error("clGetCommandBufferInfoKHR failed: %d", error); + return false; + } + + if (state != expected) + { + log_error("Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query. " + "Expected %u, but was %u\n", + expected, state); + return false; + } + return true; + } + + bool verify_result(const cl_mem &buffer) + { + std::vector data(num_elements); + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(), + data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (data[i] != src_data[i]) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted %d\n", + i, data[i], src_data[i]); + return false; + } + } + return true; + } + + cl_int Run() override + { + // Create command while the kernel still has the second argument unset. + // Passing 'CL_MUTABLE_DISPATCH_ARGUMENTS_KHR' as a property means this + // shouldn't be an error. + cl_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + // Finalizing the command buffer shouldn't be an error, but result in + // the command-buffer entering the CL_COMMAND_BUFFER_STATE_FINALIZED + // state. + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + if (!verify_state(CL_COMMAND_BUFFER_STATE_FINALIZED_KHR)) + { + return TEST_FAIL; + } + + // Check that trying to enqueue the command-buffer in this state is an + // error, as it needs to be in the executable state. + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_failure_error_ret(error, CL_INVALID_OPERATION, + "clEnqueueCommandBufferKHR should return " + "CL_INVALID_OPERATION", + TEST_FAIL); + + // Update the kernel command to set the missing argument. + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(out_mem), &out_mem }; + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // Now that all the arguments have been set, verify the + // command-buffer has entered the executable state. + if (!verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR)) + { + return TEST_FAIL; + } + + // Execute command-buffer and verify results are expected + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + if (!verify_result(out_mem)) return TEST_FAIL; + + return TEST_PASS; + } + + cl_mutable_command_khr command; + std::vector src_data; +}; + +} // anonymous namespace + +REGISTER_TEST(mutable_dispatch_defer_arguments) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp index b5c8ecd9..8919d5c2 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp @@ -20,6 +20,7 @@ #include #include +#include #include namespace { @@ -126,7 +127,20 @@ struct MutableCommandFullDispatch : InfoMutableCommandBufferTest &workgroupinfo_size, NULL); test_error(error, "clGetKernelWorkGroupInfo failed"); - group_size = std::min(num_elements, workgroupinfo_size); + cl_uint max_work_dimension = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(max_work_dimension), &max_work_dimension, + NULL); + test_error(error, "clGetDeviceInfo failed"); + + std::vector max_work_item_sizes(max_work_dimension, 0); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t) * max_work_item_sizes.size(), + max_work_item_sizes.data(), NULL); + test_error(error, "clGetDeviceInfo failed"); + + group_size = std::min( + { num_elements, workgroupinfo_size, max_work_item_sizes[0] }); const size_t size_to_allocate_src = group_size * sizeof(cl_int); // create and initialize source buffer diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp new file mode 100644 index 00000000..4eddd1d8 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp @@ -0,0 +1,235 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "basic_command_buffer.h" + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// Tests for cl_khr_command_buffer while enqueueing a kernel with a +// reqd_work_group_size with a NULL local_work_size. + +struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest +{ + inline static const std::string body_str = R"( + __kernel void wg_size(__global int* dst) + { + if (get_global_id(0) == 0 && + get_global_id(1) == 0 && + get_global_id(2) == 0) { + dst[0] = get_local_size(0); + dst[1] = get_local_size(1); + dst[2] = get_local_size(2); + } + } + )"; + + KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue), dst(nullptr), + clGetKernelSuggestedLocalWorkSizeKHR(nullptr), + device_max_work_group_size(0) + {} + + cl_int SetUp(int elements) override + { + cl_int error = BasicCommandBufferTest::SetUp(elements); + test_error(error, "BasicCommandBufferTest::SetUp failed"); + + if (is_extension_available(device, "cl_khr_suggested_local_work_size")) + { + cl_platform_id platform = nullptr; + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, + sizeof(platform), &platform, NULL); + test_error(error, "clGetDeviceInfo for platform failed"); + + clGetKernelSuggestedLocalWorkSizeKHR = + (clGetKernelSuggestedLocalWorkSizeKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSuggestedLocalWorkSizeKHR"); + test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr, + "Couldn't get function pointer for " + "clGetKernelSuggestedLocalWorkSizeKHR"); + } + + dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int), + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + + cl_uint device_max_dim = 0; + error = + clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(device_max_dim), &device_max_dim, nullptr); + test_error( + error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed"); + test_assert_error( + device_max_dim >= 3, + "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!"); + + device_max_work_item_sizes.resize(device_max_dim); + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t) * device_max_dim, + device_max_work_item_sizes.data(), nullptr); + + error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(device_max_work_group_size), + &device_max_work_group_size, nullptr); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed"); + + return CL_SUCCESS; + } + + cl_int Run() override + { + cl_int error = CL_SUCCESS; + struct KernelAttribInfo + { + cl_int wgs[3]; + cl_uint min_dim; + }; + + std::vector attribs = { { { 2, 1, 1 }, 1 }, + { { 2, 3, 1 }, 2 }, + { { 2, 3, 4 }, 3 } }; + + for (auto& attrib : attribs) + { + const std::string attrib_str = + "__attribute__((reqd_work_group_size(" + + std::to_string(attrib.wgs[0]) + "," + + std::to_string(attrib.wgs[1]) + "," + + std::to_string(attrib.wgs[2]) + ")))"; + const std::string source_str = attrib_str + body_str; + const char* source = source_str.c_str(); + + clProgramWrapper program; + clKernelWrapper kernel; + error = create_single_kernel_helper(context, &program, &kernel, 1, + &source, "wg_size"); + test_error(error, "Unable to create test kernel"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst); + test_error(error, "clSetKernelArg failed"); + + for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++) + { + const size_t test_work_group_size = + attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2]; + if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0] + || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1] + || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2] + || test_work_group_size > device_max_work_group_size) + { + log_info( + "Skipping test for work_dim = %u: required work group " + "size (%i, %i, %i) (total %zu) exceeds device max " + "work group size (%zu, %zu, %zu) (total %zu)\n", + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], + test_work_group_size, device_max_work_item_sizes[0], + device_max_work_item_sizes[1], + device_max_work_item_sizes[2], + device_max_work_group_size); + continue; + } + + const cl_int zero = 0; + error = clCommandFillBufferKHR( + command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero), + 0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 }; + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, work_dim, nullptr, + global_work_size, nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + cl_int results[3] = { -1, -1, -1 }; + error = + clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results), + results, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + // Verify the result + if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1] + || results[2] != attrib.wgs[2]) + { + log_error( + "Executed local size mismatch with work_dim = %u: " + "Expected (%d,%d,%d) got (%d,%d,%d)\n", + work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2], + results[0], results[1], results[2]); + return TEST_FAIL; + } + + if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr) + { + size_t suggested[3] = { 1, 1, 1 }; + error = clGetKernelSuggestedLocalWorkSizeKHR( + queue, kernel, work_dim, nullptr, global_work_size, + suggested); + test_error(error, + "clGetKernelSuggestedLocalWorkSizeKHR failed"); + + if (suggested[0] != (size_t)attrib.wgs[0] + || suggested[1] != (size_t)attrib.wgs[1] + || suggested[2] != (size_t)attrib.wgs[2]) + { + log_error( + "Suggested local size mismatch with work_dim = " + "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n", + work_dim, attrib.wgs[0], attrib.wgs[1], + attrib.wgs[2], suggested[0], suggested[1], + suggested[2]); + return TEST_FAIL; + } + } + + // create new command buffer + command_buffer = + clCreateCommandBufferKHR(1, &queue, nullptr, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + } + } + + return CL_SUCCESS; + } + + clMemWrapper dst; + clGetKernelSuggestedLocalWorkSizeKHR_fn + clGetKernelSuggestedLocalWorkSizeKHR; + + size_t device_max_work_group_size; + std::vector device_max_work_item_sizes; +}; + +} // anonymous namespace + +REGISTER_TEST(command_null_required_work_group_size) +{ + return MakeAndRunTest( + device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp index 8900e6ff..c1079a4d 100644 --- a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp @@ -20,4 +20,4 @@ int main(int argc, const char *argv[]) { return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), false, 0); -} \ No newline at end of file +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp index 5151a668..6220a87e 100644 --- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp @@ -23,9 +23,32 @@ #include #include "debug_ahb.h" -static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage) +static bool isAHBUsageReadableHost(AHardwareBuffer_UsageFlags usage) { - return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0; + return (AHARDWAREBUFFER_USAGE_CPU_READ_MASK & usage) != 0; +} + +static bool isAHBUsageWritableHost(AHardwareBuffer_UsageFlags usage) +{ + return (AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK & usage) != 0; +} + +static bool isAHBUsageReadableDevice(const AHardwareBuffer_UsageFlags usage) +{ + return ((AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER + | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) + & usage) + != 0; +} + +static cl_ulong getMaxAllocSize(cl_device_id device) +{ + cl_ulong ret; + cl_int err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(cl_ulong), &ret, nullptr); + test_error(err, "clGetDeviceInfo failed"); + return ret; } struct ahb_format_table @@ -50,6 +73,32 @@ ahb_image_size_table test_sizes[] = { { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 } }; +uint32_t test_buffer_sizes[] = { 2, 8, 32, 128, 512, 2048, 16384, 65536 }; + +ahb_usage_table test_buffer_usages[] = { + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY + | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) }, + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY + | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) }, + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) }, + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) }, + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY) }, + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) } +}; + ahb_usage_table test_usages[] = { { static_cast( AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN @@ -96,6 +145,23 @@ static const char *diff_images_kernel_source = { })" }; +static const char *lifetime_kernel_source = { + R"( + __kernel void increment_buffer(global uchar* buffer) + { + int tid = get_global_id(0); + buffer[tid] ++; + } + + __kernel void set_image_color(write_only image2d_t ahb_image, float4 set_color) + { + int tidX = get_global_id(0); + int tidY = get_global_id(1); + + write_imagef(ahb_image, (int2)( tidX, tidY ), set_color); + })" +}; + // Checks that the inferred image format is correct REGISTER_TEST(images) { @@ -217,8 +283,9 @@ REGISTER_TEST(images_read) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageWritableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -512,8 +579,9 @@ REGISTER_TEST(enqueue_read_image) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageWritableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -690,8 +758,9 @@ REGISTER_TEST(enqueue_copy_image) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageWritableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -993,8 +1062,9 @@ REGISTER_TEST(enqueue_copy_image_to_buffer) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageWritableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -1181,8 +1251,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -1222,8 +1292,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image) imageInfo.type = format.clMemObjectType; imageInfo.width = resolution.width; imageInfo.height = resolution.height; - imageInfo.rowPitch = resolution.width * resolution.height - * pixelSize; // data is tightly packed in buffer + // data is tightly packed in buffer + imageInfo.rowPitch = resolution.width * pixelSize; test_assert_error(imageInfo.rowPitch >= pixelSize * imageInfo.width, "Row pitch is smaller than width"); @@ -1376,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -1429,8 +1499,8 @@ REGISTER_TEST(enqueue_write_image) imageInfo.type = format.clMemObjectType; imageInfo.width = resolution.width; imageInfo.height = resolution.height; - imageInfo.rowPitch = resolution.width * resolution.height - * pixelSize; // Data is tightly packed + // Data is tightly packed + imageInfo.rowPitch = resolution.width * pixelSize; test_assert_error(imageInfo.rowPitch >= pixelSize * imageInfo.width, "Row pitch is smaller than width"); @@ -1568,8 +1638,8 @@ REGISTER_TEST(enqueue_fill_image) aHardwareBufferDesc.format = format.aHardwareBufferFormat; for (auto usage : test_usages) { - // Filter out usage flags that are not readable on device - if (!isAHBUsageReadable(usage.usageFlags)) + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) { continue; } @@ -1620,8 +1690,8 @@ REGISTER_TEST(enqueue_fill_image) imageInfo.type = format.clMemObjectType; imageInfo.width = resolution.width; imageInfo.height = resolution.height; - imageInfo.rowPitch = resolution.width * resolution.height - * pixelSize; // Data is tightly packed + imageInfo.rowPitch = resolution.width * pixelSize; + // Data is tightly packed test_assert_error(imageInfo.rowPitch >= pixelSize * imageInfo.width, "Row pitch is smaller than width"); @@ -1857,3 +1927,412 @@ REGISTER_TEST(blob) return TEST_PASS; } + +/* + * For cl buffer and cl image + * Create a AHB + * Create a mem object from the AHB + * Release the AHB + * Read and write using the mem object + * Verify reads and writes + */ +REGISTER_TEST(lifetime_buffer) +{ + REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer"); + + cl_int err; + constexpr cl_uint buffer_size = 4096; + std::vector host_buffer(buffer_size, 1); + clMemWrapper imported_buffer; + + { + // Check if AHB descriptors for buffers and images are supported + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.width = buffer_size; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + log_unsupported_ahb_format(aHardwareBufferDesc); + return TEST_SKIPPED_ITSELF; + } + + log_info("Testing buffer lifetime\n"); + + AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc); + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + aHardwareBuffer.get_props(), + 0, + }; + + imported_buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_error(err, "Failed to create CL buffer from AHardwareBuffer"); + + // Fill AHB buffer + void *data_ptr = nullptr; + int ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr, + &data_ptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", ahb_result); + return TEST_FAIL; + } + + memcpy(data_ptr, host_buffer.data(), buffer_size); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + } // Release test scope reference to AHB + + + // Verify buffer read by comparing to host buffer + std::vector read_buffer(buffer_size); + err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size, + read_buffer.data(), 0, nullptr, nullptr); + test_error(err, "failed clEnqueueReadBuffer"); + + for (size_t i = 0; i < buffer_size; i++) + { + if (read_buffer[i] != host_buffer[i]) + { + log_error("At position %zu expected value: %u but got value: %u\n", + i, host_buffer[i], read_buffer[i]); + return TEST_FAIL; + } + } + + // Attempt buffer write + clProgramWrapper program; + clKernelWrapper kernel; + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &lifetime_kernel_source, + "increment_buffer"); + test_error(err, "kernel creation failed"); + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_buffer); + test_error(err, "clSetKernelArg failed"); + + size_t gws[1] = { buffer_size }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, gws, nullptr, 0, + nullptr, nullptr); + test_error(err, "Failed clEnqueueNDRangeKernel"); + + // Verify write + err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size, + read_buffer.data(), 0, nullptr, nullptr); + test_error(err, "failed clEnqueueReadBuffer"); + + for (size_t i = 0; i < buffer_size; i++) + { + if (read_buffer[i] + != host_buffer[i] + 1) // Kernel incremented each index by 1 + { + log_error("At position %zu expected value: %u but got value: %u\n", + i, host_buffer[i], read_buffer[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; +} + + +REGISTER_TEST(lifetime_image) +{ + REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer"); + + int err; + const AHardwareBuffer_Format aHardwareBufferFormat = + AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + const cl_image_format clImageFormat = { CL_RGBA, CL_UNORM_INT8 }; + const size_t pixel_size = get_pixel_size(&clImageFormat); + + for (auto resolution : test_sizes) + { + const size_t image_size = + resolution.width * resolution.height * pixel_size; + + std::vector host_image_data(image_size, 1); + clMemWrapper imported_image; + { + // Check if AHB descriptors for buffers and images are supported + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.format = aHardwareBufferFormat; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + log_unsupported_ahb_format(aHardwareBufferDesc); + continue; + } + + log_info("Testing image lifetime\n"); + + AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc); + + const cl_mem_properties props_image[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + aHardwareBuffer.get_props(), + 0, + }; + + imported_image = clCreateImageWithProperties( + context, props_image, CL_MEM_READ_WRITE, nullptr, nullptr, + nullptr, &err); + test_error(err, "Failed to create CL image from AHardwareBuffer"); + + void *data_ptr = nullptr; + int ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &data_ptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(data_ptr, host_image_data.data(), image_size); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + } // Release test scope reference to AHB + + + // Verify image read using host data + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { resolution.width, resolution.height, 1 }; + size_t row_pitch; + uint8_t *mapped_image_ptr = static_cast(clEnqueueMapImage( + queue, imported_image, true, CL_MAP_READ, origin, region, + &row_pitch, nullptr, 0, nullptr, nullptr, &err)); + test_error(err, "clEnqueueMapImage failed"); + + for (size_t row = 0; row < resolution.height; ++row) + { + for (size_t col = 0; col < resolution.width; ++col) + { + size_t mapped_image_idx = row * row_pitch + col; + size_t host_image_idx = row * resolution.width + col; + if (mapped_image_ptr[mapped_image_idx] + != host_image_data[host_image_idx]) + { + log_error( + "At position (%zu, %zu) expected value: %u but got " + "value: %u\n", + row, col, host_image_data[host_image_idx], + mapped_image_ptr[mapped_image_idx]); + return TEST_FAIL; + } + } + } + + err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr, + 0, nullptr, nullptr); + test_error(err, "clEnqueueUnmapMemObject failed"); + + err = clFinish(queue); + test_error(err, "clFinish failed"); + + + // Attempt image write + clProgramWrapper program; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &lifetime_kernel_source, + "set_image_color"); + test_error(err, "kernel creation failed"); + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image); + test_error(err, "clSetKernelArg failed"); + + cl_float4 color = { { 0.5f, 0.5f, 0.5f, 0.5f } }; + err = clSetKernelArg(kernel, 1, sizeof(cl_float4), &color); + test_error(err, "clSetKernelArg failed"); + + std::vector gws = { resolution.width, resolution.height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, gws.data(), + nullptr, 0, nullptr, nullptr); + test_error(err, "Failed clEnqueueNDRangeKernel"); + + err = clFinish(queue); + test_error(err, "clFinish failed"); + + + // Verify image write + mapped_image_ptr = static_cast(clEnqueueMapImage( + queue, imported_image, true, CL_MAP_READ, origin, region, + &row_pitch, nullptr, 0, nullptr, nullptr, &err)); + test_error(err, "clEnqueueMapImage failed"); + + for (size_t row = 0; row < resolution.height; ++row) + { + for (size_t col = 0; col < resolution.width; ++col) + { + size_t mapped_image_idx = row * row_pitch + col; + if (128 != mapped_image_ptr[mapped_image_idx]) + { + log_error( + "At position (%zu, %zu) expected value: %u but got " + "value: %u\n", + row, col, 128, mapped_image_ptr[mapped_image_idx]); + return TEST_FAIL; + } + } + } + + err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr, + 0, nullptr, nullptr); + test_error(err, "clEnqueueUnmapMemObject failed"); + + err = clFinish(queue); + test_error(err, "clFinish failed"); + } + return TEST_PASS; +} + + +/* Testing clCreateSubBuffer + * Create AHB + * Write to AHB + * Create CL buffer from AHB + * Create a sub buffer into half of the buffer + * Read & verify sub buffer + */ +REGISTER_TEST(sub_buffer) +{ + cl_int err; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + for (auto usage : test_buffer_usages) + { + if (!(isAHBUsageReadableHost(usage.usageFlags) + && isAHBUsageWritableHost(usage.usageFlags) + && isAHBUsageReadableDevice(usage.usageFlags))) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (uint32_t buffer_size : test_buffer_sizes) + { + if (buffer_size > getMaxAllocSize(device)) + { + continue; + } + + aHardwareBufferDesc.width = buffer_size; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + log_unsupported_ahb_format(aHardwareBufferDesc); + continue; + } + + AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc); + + log_info("Testing usage: %s, buffer size: %u\n", + ahardwareBufferDecodeUsageFlagsToString(usage.usageFlags) + .c_str(), + buffer_size); + + void *hardware_buffer_data = nullptr; + int ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + std::vector host_buffer(buffer_size); + + generate_random_data(ExplicitType::kUnsignedChar, buffer_size, seed, + host_buffer.data()); + + memcpy(hardware_buffer_data, host_buffer.data(), buffer_size); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + aHardwareBuffer.get_props(), 0 + }; + + clMemWrapper buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_error(err, "Failed to create CL buffer from AHardwareBuffer"); + + cl_uint sub_buffer_size = buffer_size / 2; + cl_buffer_region region = { 0 }; + region.origin = 0; + region.size = sub_buffer_size; + + clMemWrapper sub_buffer = + clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + test_error(err, "clCreateSubBuffer failed"); + + std::vector host_sub_buffer(sub_buffer_size); + err = clEnqueueReadBuffer(queue, sub_buffer, true, 0, + sub_buffer_size, host_sub_buffer.data(), + 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < sub_buffer_size; ++i) + { + if (host_buffer[i] != host_sub_buffer[i]) + { + log_error( + "At position i=%zu expected value %u but got %u\n", i, + host_buffer[i], host_sub_buffer[i]); + return TEST_FAIL; + } + } + } + } + return TEST_PASS; +} diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt index 1161e519..34d9034b 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt @@ -3,7 +3,6 @@ set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE) set(${MODULE_NAME}_SOURCES main.cpp test_external_semaphore.cpp - test_external_semaphore_sync_fd.cpp ) set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}") @@ -18,7 +17,6 @@ include_directories (${CLConform_INCLUDE_DIR}) list(APPEND CLConform_LIBRARIES vulkan_wrapper) set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) -set(CMAKE_CXX_FLAGS "-fpermissive") include_directories("../../common/vulkan_wrapper") diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index 198bf046..1e9fd74a 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -197,8 +197,11 @@ REGISTER_TEST_VERSION(external_semaphores_queries, Version(1, 2)) return TEST_PASS; } -REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) +cl_int doTestImportExport(cl_device_id device, cl_context contexts[2], + cl_command_queue queues[2]) { + cl_int err = CL_SUCCESS; + REQUIRE_EXTENSION("cl_khr_external_semaphore"); GET_PFN(device, clEnqueueSignalSemaphoresKHR); @@ -210,7 +213,6 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) std::vector import_handle_types; std::vector export_handle_types; - cl_int err = CL_SUCCESS; err = get_device_semaphore_handle_types( device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, import_handle_types); @@ -237,17 +239,9 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) export_handle_types.begin(), export_handle_types.end(), std::back_inserter(import_export_handle_types)); - cl_context context2 = - clCreateContext(NULL, 1, &device, notify_callback, NULL, &err); - test_error(err, "Failed to create context2"); - - clCommandQueueWrapper queue1 = - clCreateCommandQueue(context, device, 0, &err); - test_error(err, "Could not create command queue"); - - clCommandQueueWrapper queue2 = - clCreateCommandQueue(context2, device, 0, &err); - test_error(err, "Could not create command queue"); + cl_context& context2 = contexts[1]; + cl_command_queue& queue1 = queues[0]; + cl_command_queue& queue2 = queues[1]; if (import_export_handle_types.empty()) { @@ -270,7 +264,7 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) // Signal semaphore on context1 cl_semaphore_khr exportable_semaphore = - clCreateSemaphoreWithPropertiesKHR(context, export_props, &err); + clCreateSemaphoreWithPropertiesKHR(contexts[0], export_props, &err); test_error(err, "Failed to create exportable semaphore"); err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore, @@ -313,12 +307,77 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) test_error(err, "Failed to release semaphore"); } - err = clReleaseContext(context2); - test_error(err, "Failed to release context2"); - return TEST_PASS; } +REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2)) +{ + cl_int err = CL_SUCCESS; + + clContextWrapper context_sec = + clCreateContext(NULL, 1, &device, notify_callback, NULL, &err); + test_error(err, "Failed to create context2"); + cl_context contexts[2] = { context, context_sec }; + + clCommandQueueWrapper queue0 = + clCreateCommandQueue(context, device, 0, &err); + test_error(err, "Could not create command queue"); + + clCommandQueueWrapper queue1 = + clCreateCommandQueue(contexts[1], device, 0, &err); + test_error(err, "Could not create command queue"); + cl_command_queue queues[2] = { queue0, queue1 }; + + return doTestImportExport(device, contexts, queues); +} + +REGISTER_TEST_VERSION(external_semaphores_import_export, Version(1, 2)) +{ + cl_int err = CL_SUCCESS; + cl_int total_status = TEST_PASS; + + // test external semaphores with out-of-order queue + { + cl_command_queue_properties device_props = 0; + err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, + sizeof(device_props), &device_props, NULL); + test_error(err, + "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); + + if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0) + { + // Create ooo queue + clCommandQueueWrapper test_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); + test_error(err, "Could not create command queue"); + + cl_command_queue queues[2] = { test_queue, test_queue }; + cl_context contexts[2] = { context, context }; + + cl_int status = doTestImportExport(device, contexts, queues); + if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF) + { + total_status = TEST_FAIL; + } + } + } + + // test external semaphore sync fd with in-order harness queue + { + cl_command_queue queues[2] = { queue, queue }; + cl_context contexts[2] = { context, context }; + + cl_int status = doTestImportExport(device, contexts, queues); + if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF) + { + total_status = TEST_FAIL; + } + } + + return total_status; +} + + // Confirm that a signal followed by a wait will complete successfully REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) { @@ -366,9 +425,14 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) // Signal semaphore clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + err = clEnqueueSignalSemaphoresKHR( + queue, 1, &sema_ext.getCLSemaphore(), nullptr, 0, nullptr, + &signal_event); + test_error(err, "Could not signal semaphore"); + } // Wait semaphore clEventWrapper wait_event; @@ -381,7 +445,11 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) test_error(err, "Could not finish queue"); // Ensure all events are completed - test_assert_event_complete(signal_event); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + test_assert_event_complete(signal_event); + } test_assert_event_complete(wait_event); } @@ -405,6 +473,7 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) // Obtain pointers to semaphore's API GET_PFN(device, clEnqueueSignalSemaphoresKHR); GET_PFN(device, clEnqueueWaitSemaphoresKHR); + GET_PFN(device, clReImportSemaphoreSyncFdKHR); std::vector vkExternalSemaphoreHandleTypeList = @@ -448,11 +517,15 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]); test_error(err, "Unable to enqueue task_1"); - // Signal semaphore (dependency on task_1) - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &task_events[0], - &signal_events[0]); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + // Signal semaphore (dependency on task_1) + err = clEnqueueSignalSemaphoresKHR( + queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1, + &task_events[0], &signal_events[0]); + test_error(err, "Could not signal semaphore"); + } // In a loop size_t loop; @@ -473,11 +546,21 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) err = clWaitForEvents(1, &wait_events[loop - 1]); test_error(err, "Unable to wait for wait semaphore to complete"); - // Signal semaphore (dependency on task_loop) - err = clEnqueueSignalSemaphoresKHR( - queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1, - &task_events[loop], &signal_events[loop]); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + err = clReImportSemaphoreSyncFdKHR(sema_ext.getCLSemaphore(), + nullptr, -1); + test_error(err, "Could not reimport semaphore sync fd"); + } + else + { + // Signal semaphore (dependency on task_loop) + err = clEnqueueSignalSemaphoresKHR( + queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1, + &task_events[loop], &signal_events[loop]); + test_error(err, "Could not signal semaphore"); + } } // Wait semaphore @@ -494,7 +577,11 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) for (loop = 0; loop < loop_count; ++loop) { test_assert_event_complete(wait_events[loop]); - test_assert_event_complete(signal_events[loop]); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + test_assert_event_complete(signal_events[loop]); + } test_assert_event_complete(task_events[loop]); } } @@ -536,6 +623,19 @@ static int external_semaphore_cross_queue_helper(cl_device_id device, for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : vkExternalSemaphoreHandleTypeList) { + if (vkExternalSemaphoreHandleType + == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + std::stringstream log_message; + log_message + << "Skipping semaphore type: \"" + << vkExternalSemaphoreHandleType + << "\"; it cannot be signaled from OpenCL when imported." + << std::endl; + log_info("%s", log_message.str().c_str()); + continue; + } + log_info_semaphore_type(vkExternalSemaphoreHandleType); VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); @@ -668,10 +768,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2)) // Signal semaphore 1 clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR( - queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr, - &signal_1_event); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + err = clEnqueueSignalSemaphoresKHR( + queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr, + &signal_1_event); + test_error(err, "Could not signal semaphore"); + } // Wait semaphore 1 clEventWrapper wait_1_event; @@ -682,10 +786,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2)) // Signal semaphore 2 clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR( - queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr, - &signal_2_event); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + err = clEnqueueSignalSemaphoresKHR( + queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr, + &signal_2_event); + test_error(err, "Could not signal semaphore"); + } // Wait semaphore 2 clEventWrapper wait_2_event; @@ -702,8 +810,12 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2)) test_error(err, "Could not finish queue"); // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + test_assert_event_complete(signal_1_event); + test_assert_event_complete(signal_2_event); + } test_assert_event_complete(wait_1_event); test_assert_event_complete(wait_2_event); } @@ -741,6 +853,19 @@ REGISTER_TEST_VERSION(external_semaphores_multi_signal, Version(1, 2)) for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : vkExternalSemaphoreHandleTypeList) { + if (vkExternalSemaphoreHandleType + == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + std::stringstream log_message; + log_message + << "Skipping semaphore type: \"" + << vkExternalSemaphoreHandleType + << "\"; it cannot be signaled from OpenCL when imported." + << std::endl; + log_info("%s", log_message.str().c_str()); + continue; + } + log_info_semaphore_type(vkExternalSemaphoreHandleType); VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType); @@ -842,19 +967,23 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2)) context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); test_error(err, "Could not create command queue"); - // Signal semaphore 1 clEventWrapper signal_1_event; - err = - clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 0, nullptr, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore 2 clEventWrapper signal_2_event; - err = - clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(), - nullptr, 0, nullptr, &signal_2_event); - test_error(err, "Could not signal semaphore"); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + // Signal semaphore 1 + err = clEnqueueSignalSemaphoresKHR( + queue, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr, + &signal_1_event); + test_error(err, "Could not signal semaphore"); + + // Signal semaphore 2 + err = clEnqueueSignalSemaphoresKHR( + queue, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr, + &signal_2_event); + test_error(err, "Could not signal semaphore"); + } // Wait semaphore 1 and 2 clEventWrapper wait_event; @@ -869,8 +998,12 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2)) test_error(err, "Could not finish queue"); // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); + if (vkExternalSemaphoreHandleType + != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD) + { + test_assert_event_complete(signal_1_event); + test_assert_event_complete(signal_2_event); + } test_assert_event_complete(wait_event); } diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp deleted file mode 100644 index 2fcf4f3f..00000000 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// -// Copyright (c) 2024 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "harness/typeWrappers.h" -#include "harness/extensionHelpers.h" -#include "harness/errorHelpers.h" - -// Test it is possible to export a semaphore to a sync fd and import the same -// sync fd to a new semaphore -REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2)) -{ - cl_int err = CL_SUCCESS; - - if (!is_extension_available(device, "cl_khr_external_semaphore")) - { - log_info( - "cl_khr_external_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (!is_extension_available(device, "cl_khr_external_semaphore_sync_fd")) - { - log_info("cl_khr_external_semaphore_sync_fd is not supported on this " - "platoform. Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - cl_command_queue_properties device_props = 0; - err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, - sizeof(device_props), &device_props, NULL); - test_error(err, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); - - if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0) - { - log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not " - "supported. Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - GET_PFN(device, clGetSemaphoreHandleForTypeKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper test_queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_1_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), - static_cast( - CL_SEMAPHORE_HANDLE_SYNC_FD_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), - 0 - }; - cl_semaphore_khr sema_1 = - clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err); - test_error(err, "Could not create semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(test_queue, 1, &sema_1, nullptr, 0, - nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Extract sync fd - int handle = -1; - size_t handle_size; - err = clGetSemaphoreHandleForTypeKHR(sema_1, device, - CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, - sizeof(handle), &handle, &handle_size); - test_error(err, "Could not extract semaphore handle"); - test_assert_error(sizeof(handle) == handle_size, "Invalid handle size"); - test_assert_error(handle >= 0, "Invalid handle"); - - // Create semaphore from sync fd - cl_semaphore_properties_khr sema_2_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, - static_cast(handle), 0 - }; - - cl_semaphore_khr sema_2 = - clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err); - test_error(err, "Could not create semaphore"); - - // Wait semaphore - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(test_queue, 1, &sema_2, nullptr, 0, - nullptr, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Finish - err = clFinish(test_queue); - test_error(err, "Could not finish queue"); - - // Check all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema_1); - test_error(err, "Could not release semaphore"); - - err = clReleaseSemaphoreKHR(sema_2); - test_error(err, "Could not release semaphore"); - return TEST_PASS; -} diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp index 85c8fc7f..8a0de351 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp @@ -17,6 +17,7 @@ int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), - test_registry::getInstance().definitions(), false, 0); + return runTestHarness( + argc, argv, static_cast(test_registry::getInstance().num_tests()), + test_registry::getInstance().definitions(), false, 0); } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h index f8ccb570..31488d9d 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h @@ -21,23 +21,118 @@ #include "harness/errorHelpers.h" #include "directx_wrapper.hpp" -class CLDXSemaphoreWrapper { -public: - CLDXSemaphoreWrapper(cl_device_id device, cl_context context, - ID3D12Device* dx_device) - : device(device), context(context), dx_device(dx_device){}; - - int createSemaphoreFromFence(ID3D12Fence* fence) +struct DXFenceTestBase +{ + DXFenceTestBase(cl_device_id device, cl_context context, + cl_command_queue queue, cl_int num_elems) + : device(device), context(context), queue(queue), num_elems(num_elems) + {} + virtual ~DXFenceTestBase() { - cl_int errcode = CL_SUCCESS; + if (fence_handle) + { + CloseHandle(fence_handle); + fence_handle = nullptr; + } + if (fence_wrapper) + { + delete fence_wrapper; + fence_wrapper = nullptr; + } + if (semaphore) + { + clReleaseSemaphoreKHR(semaphore); + semaphore = nullptr; + } + }; - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); + virtual int SetUp() + { + REQUIRE_EXTENSION("cl_khr_external_semaphore"); + REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - const HRESULT hr = dx_device->CreateSharedHandle( - fence, nullptr, GENERIC_ALL, nullptr, &fence_handle); - test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence"); + // Obtain pointers to semaphore's API + GET_FUNCTION_EXTENSION_ADDRESS(device, + clCreateSemaphoreWithPropertiesKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR); - cl_semaphore_properties_khr sem_props[] = { + test_error( + !is_import_handle_available(CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), + "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " + "supported import types"); + + // Import D3D12 fence into OpenCL + fence_wrapper = new DirectXFenceWrapper(dx_wrapper.getDXDevice()); + semaphore = createSemaphoreFromFence(fence_wrapper->get()); + test_assert_error(!!semaphore, "Could not create semaphore"); + + return TEST_PASS; + } + + virtual cl_int Run() = 0; + +protected: + int errcode = CL_SUCCESS; + + cl_device_id device = nullptr; + cl_context context = nullptr; + cl_command_queue queue = nullptr; + cl_int num_elems = 0; + DirectXWrapper dx_wrapper; + + cl_semaphore_payload_khr semaphore_payload = 1; + cl_semaphore_khr semaphore = nullptr; + HANDLE fence_handle = nullptr; + DirectXFenceWrapper *fence_wrapper = nullptr; + + clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR = + nullptr; + clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr; + clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr; + clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr; + clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr; + clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr; + clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr; + + [[nodiscard]] bool is_import_handle_available( + const cl_external_memory_handle_type_khr handle_type) + { + size_t import_types_size = 0; + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, + 0, nullptr, &import_types_size); + if (errcode != CL_SUCCESS) + { + log_error("Could not query import semaphore handle types"); + return false; + } + std::vector import_types( + import_types_size / sizeof(cl_external_semaphore_handle_type_khr)); + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, + import_types_size, import_types.data(), nullptr); + if (errcode != CL_SUCCESS) + { + log_error("Could not query import semaphore handle types"); + return false; + } + + return std::find(import_types.begin(), import_types.end(), handle_type) + != import_types.end(); + } + + cl_semaphore_khr createSemaphoreFromFence(ID3D12Fence *src_fence) + { + const HRESULT hr = dx_wrapper.getDXDevice()->CreateSharedHandle( + src_fence, nullptr, GENERIC_ALL, nullptr, &fence_handle); + if (FAILED(hr)) return nullptr; + + const cl_semaphore_properties_khr sem_props[] = { static_cast(CL_SEMAPHORE_TYPE_KHR), static_cast( CL_SEMAPHORE_TYPE_BINARY_KHR), @@ -45,73 +140,30 @@ public: CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), reinterpret_cast(fence_handle), 0 }; - semaphore = + cl_semaphore_khr tmp_semaphore = clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); - test_error(errcode, "Could not create semaphore"); + if (errcode != CL_SUCCESS) return nullptr; - return CL_SUCCESS; - } - - ~CLDXSemaphoreWrapper() - { - releaseSemaphore(); - if (fence_handle) - { - CloseHandle(fence_handle); - } - }; - - const cl_semaphore_khr* operator&() const { return &semaphore; }; - cl_semaphore_khr operator*() const { return semaphore; }; - - HANDLE getHandle() const { return fence_handle; }; - -private: - cl_semaphore_khr semaphore; - ComPtr fence; - HANDLE fence_handle; - cl_device_id device; - cl_context context; - ComPtr dx_device; - - int releaseSemaphore() const - { - GET_PFN(device, clReleaseSemaphoreKHR); - - if (semaphore) - { - clReleaseSemaphoreKHR(semaphore); - } - - return CL_SUCCESS; + return tmp_semaphore; } }; -static bool -is_import_handle_available(cl_device_id device, - const cl_external_memory_handle_type_khr handle_type) +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, cl_int nelems) { - int errcode = CL_SUCCESS; - size_t import_types_size = 0; - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0, - nullptr, &import_types_size); - if (errcode != CL_SUCCESS) + cl_int status = TEST_PASS; + try { - log_error("Could not query import semaphore handle types"); - return false; - } - std::vector import_types( - import_types_size / sizeof(cl_external_semaphore_handle_type_khr)); - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, - import_types_size, import_types.data(), nullptr); - if (errcode != CL_SUCCESS) + auto test_fixture = T(device, context, queue, nelems); + status = test_fixture.SetUp(); + if (status != TEST_PASS) return status; + status = test_fixture.Run(); + } catch (const std::runtime_error &e) { - log_error("Could not query import semaphore handle types"); - return false; + log_error("%s", e.what()); + return TEST_FAIL; } - return std::find(import_types.begin(), import_types.end(), handle_type) - != import_types.end(); + return status; } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp index 569fb204..db303bf6 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp @@ -16,309 +16,279 @@ #include "semaphore_dx_fence_base.h" +struct SignalWait final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + cl_int Run() override + { + log_info("Calling clEnqueueSignalSemaphoresKHR\n"); + clEventWrapper signal_event; + errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, + &semaphore_payload, 0, nullptr, + &signal_event); + test_error(errcode, "Failed to signal semaphore"); + + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR( + queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); + test_error(errcode, "Failed to wait semaphore"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + // Verify that the events completed. + test_assert_event_complete(signal_event); + test_assert_event_complete(wait_event); + + return TEST_PASS; + } +}; + // Confirm that a signal followed by a wait in OpenCL will complete successfully REGISTER_TEST(test_external_semaphores_signal_wait) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling clEnqueueSignalSemaphoresKHR\n"); - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - clEventWrapper signal_event; - errcode = clEnqueueSignalSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); - test_error(errcode, "Failed to signal semaphore"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); - test_error(errcode, "Failed to wait semaphore"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - // Verify that the events completed. - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } +struct SignalDXCPU final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + cl_int Run() override + { + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR( + queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); + test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); + + log_info("Calling d3d12_fence->Signal()\n"); + const HRESULT hr = fence_wrapper->get()->Signal(semaphore_payload); + test_error(FAILED(hr), "Failed to signal D3D12 fence"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(wait_event); + + return TEST_PASS; + } +}; + // Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete // successfully REGISTER_TEST(test_external_semaphores_signal_dx_cpu) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); - test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); - - log_info("Calling d3d12_fence->Signal()\n"); - const HRESULT hr = (*fence)->Signal(semaphore_payload); - test_error(FAILED(hr), "Failed to signal D3D12 fence"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(wait_event); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } +struct SignalDXGPU final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + cl_int Run() override + { + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR( + queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); + test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); + + log_info("Calling d3d12_command_queue->Signal()\n"); + const HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal( + fence_wrapper->get(), semaphore_payload); + test_error(FAILED(hr), "Failed to signal D3D12 fence"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(wait_event); + + return TEST_PASS; + } +}; + // Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete // successfully REGISTER_TEST(test_external_semaphores_signal_dx_gpu) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); - test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); - - log_info("Calling d3d12_command_queue->Signal()\n"); - const HRESULT hr = - dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload); - test_error(FAILED(hr), "Failed to signal D3D12 fence"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(wait_event); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } +struct CLDXInterlock final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + cl_int Run() override + { + log_info("Calling d3d12_command_queue->Wait(1)\n"); + HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(), + semaphore_payload); + test_error(FAILED(hr), "Failed to wait on D3D12 fence"); + + log_info("Calling d3d12_command_queue->Signal(2)\n"); + hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(), + semaphore_payload + 1); + test_error(FAILED(hr), "Failed to signal D3D12 fence"); + + log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n"); + clEventWrapper signal_event; + errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, + &semaphore_payload, 0, nullptr, + &signal_event); + test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR"); + + log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n"); + semaphore_payload += 1; + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR( + queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); + test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(wait_event); + test_assert_event_complete(signal_event); + + return TEST_PASS; + } +}; + // Confirm that interlocking waits between OpenCL and DX12 will complete // successfully REGISTER_TEST(test_external_semaphores_cl_dx_interlock) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling d3d12_command_queue->Wait(1)\n"); - cl_semaphore_payload_khr semaphore_payload = 1; - HRESULT hr = - dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload); - test_error(FAILED(hr), "Failed to wait on D3D12 fence"); - - log_info("Calling d3d12_command_queue->Signal(2)\n"); - hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1); - test_error(FAILED(hr), "Failed to signal D3D12 fence"); - - log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n"); - clEventWrapper signal_event; - errcode = clEnqueueSignalSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); - test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR"); - - log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n"); - semaphore_payload += 1; - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); - test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(wait_event); - test_assert_event_complete(signal_event); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } +struct MultipleWaitSignal final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + ~MultipleWaitSignal() override + { + if (fence_handle_2) + { + CloseHandle(fence_handle_2); + fence_handle_2 = nullptr; + } + if (fence_wrapper_2) + { + delete fence_wrapper_2; + fence_wrapper_2 = nullptr; + } + if (semaphore_2) + { + clReleaseSemaphoreKHR(semaphore_2); + semaphore_2 = nullptr; + } + DXFenceTestBase::~DXFenceTestBase(); + }; + + int SetUp() override + { + DXFenceTestBase::SetUp(); + fence_wrapper_2 = new DirectXFenceWrapper(dx_wrapper.getDXDevice()); + semaphore_2 = createSemaphoreFromFence(fence_wrapper_2->get()); + test_assert_error(!!semaphore_2, "Could not create semaphore"); + + return TEST_PASS; + } + + cl_int Run() override + { + const cl_semaphore_khr semaphore_list[] = { semaphore, semaphore_2 }; + cl_semaphore_payload_khr semaphore_payload_list[] = { + semaphore_payload, semaphore_payload + 1 + }; + + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list, + semaphore_payload_list, 0, nullptr, + &wait_event); + test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); + + log_info("Calling d3d12_command_queue->Signal()\n"); + HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal( + fence_wrapper_2->get(), semaphore_payload + 1); + test_error(FAILED(hr), "Failed to signal D3D12 fence 2"); + hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(), + semaphore_payload); + test_error(FAILED(hr), "Failed to signal D3D12 fence 1"); + + log_info( + "Calling d3d12_command_queue->Wait() with different payloads\n"); + hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(), + semaphore_payload + 3); + test_error(FAILED(hr), "Failed to wait on D3D12 fence 1"); + hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper_2->get(), + semaphore_payload + 2); + test_error(FAILED(hr), "Failed to wait on D3D12 fence 2"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(wait_event); + + semaphore_payload_list[0] = semaphore_payload + 3; + semaphore_payload_list[1] = semaphore_payload + 2; + + log_info("Calling clEnqueueSignalSemaphoresKHR\n"); + clEventWrapper signal_event; + errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list, + semaphore_payload_list, 0, + nullptr, &signal_event); + test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR"); + + // Wait until the GPU has completed commands up to this fence point. + log_info("Waiting for D3D12 command queue completion\n"); + if (fence_wrapper->get()->GetCompletedValue() + < semaphore_payload_list[0]) + { + const HANDLE event_handle = + CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS); + hr = fence_wrapper->get()->SetEventOnCompletion( + semaphore_payload_list[0], event_handle); + test_error(FAILED(hr), + "Failed to set D3D12 fence 1 event on completion"); + WaitForSingleObject(event_handle, INFINITE); + CloseHandle(event_handle); + } + if (fence_wrapper_2->get()->GetCompletedValue() + < semaphore_payload_list[1]) + { + const HANDLE event_handle = + CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS); + hr = fence_wrapper_2->get()->SetEventOnCompletion( + semaphore_payload_list[1], event_handle); + test_error(FAILED(hr), + "Failed to set D3D12 fence 2 event on completion"); + WaitForSingleObject(event_handle, INFINITE); + CloseHandle(event_handle); + } + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(signal_event); + + return TEST_PASS; + } + +protected: + cl_semaphore_khr semaphore_2 = nullptr; + HANDLE fence_handle_2 = nullptr; + DirectXFenceWrapper *fence_wrapper_2 = nullptr; +}; + // Confirm that multiple waits in OpenCL followed by signals in DX12 and waits // in DX12 followed by signals in OpenCL complete successfully REGISTER_TEST(test_external_semaphores_multiple_wait_signal) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore_1.createSemaphoreFromFence(*fence_1), - "Could not create semaphore"); - - const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore_2.createSemaphoreFromFence(*fence_2), - "Could not create semaphore"); - - const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 }; - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - cl_semaphore_payload_khr semaphore_payload_list[] = { - semaphore_payload, semaphore_payload + 1 - }; - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list, - semaphore_payload_list, 0, nullptr, - &wait_event); - test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR"); - - log_info("Calling d3d12_command_queue->Signal()\n"); - HRESULT hr = - dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1); - test_error(FAILED(hr), "Failed to signal D3D12 fence 2"); - hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload); - test_error(FAILED(hr), "Failed to signal D3D12 fence 1"); - - log_info("Calling d3d12_command_queue->Wait() with different payloads\n"); - hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3); - test_error(FAILED(hr), "Failed to wait on D3D12 fence 1"); - hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2); - test_error(FAILED(hr), "Failed to wait on D3D12 fence 2"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(wait_event); - - semaphore_payload_list[0] = semaphore_payload + 3; - semaphore_payload_list[1] = semaphore_payload + 2; - - log_info("Calling clEnqueueSignalSemaphoresKHR\n"); - clEventWrapper signal_event; - errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list, - semaphore_payload_list, 0, nullptr, - &signal_event); - test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR"); - - // Wait until the GPU has completed commands up to this fence point. - log_info("Waiting for D3D12 command queue completion\n"); - if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0]) - { - const HANDLE event_handle = - CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); - hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0], - event_handle); - test_error(FAILED(hr), - "Failed to set D3D12 fence 1 event on completion"); - WaitForSingleObject(event_handle, INFINITE); - CloseHandle(event_handle); - } - if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1]) - { - const HANDLE event_handle = - CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); - hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1], - event_handle); - test_error(FAILED(hr), - "Failed to set D3D12 fence 2 event on completion"); - WaitForSingleObject(event_handle, INFINITE); - CloseHandle(event_handle); - } - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(signal_event); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, + num_elements); } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp index c54cf61b..ce7db56f 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp @@ -16,205 +16,205 @@ #include "semaphore_dx_fence_base.h" +struct ExportDXSignal final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + int Run() override + { + size_t export_types_size = 0; + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + 0, nullptr, &export_types_size); + test_error(errcode, "Could not query export semaphore handle types"); + std::vector export_types( + export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + export_types_size, export_types.data(), nullptr); + test_error(errcode, "Could not query export semaphore handle types"); + + if (std::find(export_types.begin(), export_types.end(), + CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) + == export_types.end()) + { + log_info( + "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " + "the supported export types\n"); + return TEST_FAIL; + } + + constexpr cl_semaphore_properties_khr sem_props[] = { + static_cast(CL_SEMAPHORE_TYPE_KHR), + static_cast( + CL_SEMAPHORE_TYPE_BINARY_KHR), + static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), + static_cast( + CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), + static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), + 0 + }; + cl_semaphore_khr exportable_semaphore = + clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); + test_error(errcode, "Could not create semaphore"); + + cl_bool is_exportable = CL_FALSE; + errcode = clGetSemaphoreInfoKHR( + exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, + sizeof(is_exportable), &is_exportable, nullptr); + test_error(errcode, "Could not get semaphore info"); + test_error(!is_exportable, "Semaphore is not exportable"); + + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + clEventWrapper wait_event; + errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &exportable_semaphore, + &semaphore_payload, 0, nullptr, + &wait_event); + test_error(errcode, "Failed to wait semaphore"); + + HANDLE semaphore_handle = nullptr; + errcode = clGetSemaphoreHandleForTypeKHR( + exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, + sizeof(semaphore_handle), &semaphore_handle, nullptr); + test_error(errcode, "Could not get semaphore handle"); + + ID3D12Fence *exported_fence = nullptr; + errcode = dx_wrapper.getDXDevice()->OpenSharedHandle( + semaphore_handle, IID_PPV_ARGS(&exported_fence)); + test_error(errcode, "Could not open semaphore handle"); + + log_info("Calling fence->Signal()\n"); + const HRESULT hr = exported_fence->Signal(semaphore_payload); + test_error(FAILED(hr), "Failed to signal D3D12 fence"); + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(wait_event); + + // Release resources + CloseHandle(semaphore_handle); + test_error(clReleaseSemaphoreKHR(exportable_semaphore), + "Could not release semaphore"); + exported_fence->Release(); + + return TEST_PASS; + } +}; + // Confirm that a wait followed by a signal in DirectX 12 using an exported // semaphore will complete successfully REGISTER_TEST(test_external_semaphores_export_dx_signal) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - GET_PFN(device, clGetSemaphoreInfoKHR); - GET_PFN(device, clGetSemaphoreHandleForTypeKHR); - - size_t export_types_size = 0; - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0, - nullptr, &export_types_size); - test_error(errcode, "Could not query export semaphore handle types"); - std::vector export_types( - export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - export_types_size, export_types.data(), nullptr); - test_error(errcode, "Could not query export semaphore handle types"); - - if (std::find(export_types.begin(), export_types.end(), - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) - == export_types.end()) - { - log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " - "the supported export types\n"); - return TEST_FAIL; - } - - constexpr cl_semaphore_properties_khr sem_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), - static_cast( - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), - 0 - }; - cl_semaphore_khr semaphore = - clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); - test_error(errcode, "Could not create semaphore"); - - cl_bool is_exportable = CL_FALSE; - errcode = - clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, - sizeof(is_exportable), &is_exportable, nullptr); - test_error(errcode, "Could not get semaphore info"); - test_error(!is_exportable, "Semaphore is not exportable"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - clEventWrapper wait_event; - errcode = clEnqueueWaitSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event); - test_error(errcode, "Failed to wait semaphore"); - - HANDLE semaphore_handle = nullptr; - errcode = clGetSemaphoreHandleForTypeKHR( - semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, - sizeof(semaphore_handle), &semaphore_handle, nullptr); - test_error(errcode, "Could not get semaphore handle"); - - ID3D12Fence *fence = nullptr; - errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle, - IID_PPV_ARGS(&fence)); - test_error(errcode, "Could not open semaphore handle"); - - log_info("Calling fence->Signal()\n"); - const HRESULT hr = fence->Signal(semaphore_payload); - test_error(FAILED(hr), "Failed to signal D3D12 fence"); - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(wait_event); - - // Release resources - CloseHandle(semaphore_handle); - test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore"); - fence->Release(); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } +struct ExportDXWait final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + int Run() override + { + size_t export_types_size = 0; + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + 0, nullptr, &export_types_size); + test_error(errcode, "Could not query export semaphore handle types"); + std::vector export_types( + export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); + errcode = + clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, + export_types_size, export_types.data(), nullptr); + test_error(errcode, "Could not query export semaphore handle types"); + + if (std::find(export_types.begin(), export_types.end(), + CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) + == export_types.end()) + { + log_info( + "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " + "the supported export types\n"); + return TEST_FAIL; + } + + constexpr cl_semaphore_properties_khr sem_props[] = { + static_cast(CL_SEMAPHORE_TYPE_KHR), + static_cast( + CL_SEMAPHORE_TYPE_BINARY_KHR), + static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), + static_cast( + CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), + static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), + 0 + }; + cl_semaphore_khr exportable_semaphore = + clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); + test_error(errcode, "Could not create semaphore"); + + cl_bool is_exportable = CL_FALSE; + errcode = clGetSemaphoreInfoKHR( + exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, + sizeof(is_exportable), &is_exportable, nullptr); + test_error(errcode, "Could not get semaphore info"); + test_error(!is_exportable, "Semaphore is not exportable"); + + log_info("Calling clEnqueueSignalSemaphoresKHR\n"); + constexpr cl_semaphore_payload_khr semaphore_payload = 1; + clEventWrapper signal_event; + errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &exportable_semaphore, + &semaphore_payload, 0, nullptr, + &signal_event); + test_error(errcode, "Failed to signal semaphore"); + + HANDLE semaphore_handle = nullptr; + errcode = clGetSemaphoreHandleForTypeKHR( + exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, + sizeof(semaphore_handle), &semaphore_handle, nullptr); + test_error(errcode, "Could not get semaphore handle"); + + ID3D12Fence *exported_fence = nullptr; + errcode = dx_wrapper.getDXDevice()->OpenSharedHandle( + semaphore_handle, IID_PPV_ARGS(&exported_fence)); + test_error(errcode, "Could not open semaphore handle"); + + log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n"); + HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(exported_fence, + semaphore_payload); + test_error(FAILED(hr), "Failed to wait on D3D12 fence"); + + log_info("Calling WaitForSingleObject\n"); + if (exported_fence->GetCompletedValue() < semaphore_payload) + { + const HANDLE event = + CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS); + hr = exported_fence->SetEventOnCompletion(semaphore_payload, event); + test_error(FAILED(hr), "Failed to set event on completion"); + WaitForSingleObject(event, INFINITE); + CloseHandle(event); + } + + errcode = clFinish(queue); + test_error(errcode, "Could not finish queue"); + + test_assert_event_complete(signal_event); + + // Release resources + CloseHandle(semaphore_handle); + test_error(clReleaseSemaphoreKHR(exportable_semaphore), + "Could not release semaphore"); + exported_fence->Release(); + + return TEST_PASS; + } +}; + // Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an // exported semaphore will complete successfully REGISTER_TEST(test_external_semaphores_export_dx_wait) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - GET_PFN(device, clGetSemaphoreInfoKHR); - GET_PFN(device, clGetSemaphoreHandleForTypeKHR); - - size_t export_types_size = 0; - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0, - nullptr, &export_types_size); - test_error(errcode, "Could not query export semaphore handle types"); - std::vector export_types( - export_types_size / sizeof(cl_external_semaphore_handle_type_khr)); - errcode = - clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, - export_types_size, export_types.data(), nullptr); - test_error(errcode, "Could not query export semaphore handle types"); - - if (std::find(export_types.begin(), export_types.end(), - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR) - == export_types.end()) - { - log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between " - "the supported export types\n"); - return TEST_FAIL; - } - - constexpr cl_semaphore_properties_khr sem_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast(CL_SEMAPHORE_TYPE_BINARY_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), - static_cast( - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - static_cast( - CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), - 0 - }; - cl_semaphore_khr semaphore = - clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode); - test_error(errcode, "Could not create semaphore"); - - cl_bool is_exportable = CL_FALSE; - errcode = - clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR, - sizeof(is_exportable), &is_exportable, nullptr); - test_error(errcode, "Could not get semaphore info"); - test_error(!is_exportable, "Semaphore is not exportable"); - - log_info("Calling clEnqueueSignalSemaphoresKHR\n"); - constexpr cl_semaphore_payload_khr semaphore_payload = 1; - clEventWrapper signal_event; - errcode = clEnqueueSignalSemaphoresKHR( - queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event); - test_error(errcode, "Failed to signal semaphore"); - - HANDLE semaphore_handle = nullptr; - errcode = clGetSemaphoreHandleForTypeKHR( - semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, - sizeof(semaphore_handle), &semaphore_handle, nullptr); - test_error(errcode, "Could not get semaphore handle"); - - ID3D12Fence *fence = nullptr; - errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle, - IID_PPV_ARGS(&fence)); - test_error(errcode, "Could not open semaphore handle"); - - log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n"); - HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload); - test_error(FAILED(hr), "Failed to wait on D3D12 fence"); - - log_info("Calling WaitForSingleObject\n"); - if (fence->GetCompletedValue() < semaphore_payload) - { - const HANDLE event = - CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS); - hr = fence->SetEventOnCompletion(semaphore_payload, event); - test_error(FAILED(hr), "Failed to set event on completion"); - WaitForSingleObject(event, INFINITE); - CloseHandle(event); - } - - errcode = clFinish(queue); - test_error(errcode, "Could not finish queue"); - - test_assert_event_complete(signal_event); - - // Release resources - CloseHandle(semaphore_handle); - test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore"); - fence->Release(); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, num_elements); } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp index 6c032c56..398eb2b1 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp @@ -16,74 +16,52 @@ #include "semaphore_dx_fence_base.h" +struct DXFenceNegativeWait final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + int Run() override + { + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, + nullptr, nullptr); + test_assert_error( + errcode == CL_INVALID_VALUE, + "Unexpected error code returned from clEnqueueWaitSemaphores"); + + return TEST_PASS; + } +}; + // Confirm that a wait without a semaphore payload list will return // CL_INVALID_VALUE REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, - nullptr, nullptr); - test_assert_error( - errcode == CL_INVALID_VALUE, - "Unexpected error code returned from clEnqueueWaitSemaphores"); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, + num_elements); } +struct DXFenceNegativeSignal final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + int Run() override + { + log_info("Calling clEnqueueWaitSemaphoresKHR\n"); + errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, + nullptr, nullptr); + test_assert_error( + errcode == CL_INVALID_VALUE, + "Unexpected error code returned from clEnqueueSignalSemaphores"); + + return TEST_PASS; + } +}; + // Confirm that a signal without a semaphore payload list will return // CL_INVALID_VALUE REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - log_info("Calling clEnqueueWaitSemaphoresKHR\n"); - errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0, - nullptr, nullptr); - test_assert_error( - errcode == CL_INVALID_VALUE, - "Unexpected error code returned from clEnqueueSignalSemaphores"); - - return TEST_PASS; + return MakeAndRunTest(device, context, queue, + num_elements); } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp index 03aa2b15..87cb0caa 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp @@ -16,54 +16,43 @@ #include "semaphore_dx_fence_base.h" +struct DXFenceQueryProperties final : DXFenceTestBase +{ + using DXFenceTestBase::DXFenceTestBase; + + int Run() override + { + size_t properties_size_bytes = 0; + errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR, + 0, nullptr, &properties_size_bytes); + test_error(errcode, "Could not get semaphore info"); + std::vector semaphore_properties( + properties_size_bytes / sizeof(cl_semaphore_properties_khr)); + errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR, + properties_size_bytes, + semaphore_properties.data(), nullptr); + test_error(errcode, "Could not get semaphore info"); + + for (unsigned i = 0; i < semaphore_properties.size() - 1; i++) + { + if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR + && semaphore_properties[i + 1] + == reinterpret_cast( + fence_handle)) + { + return TEST_PASS; + } + } + log_error("Failed to find the dx fence handle type in the semaphore " + "properties"); + return TEST_FAIL; + } +}; + // Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the // properties returned by clGetSemaphoreInfo REGISTER_TEST(test_external_semaphores_dx_fence_query_properties) { - int errcode = CL_SUCCESS; - const DirectXWrapper dx_wrapper; - - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence"); - - // Obtain pointers to semaphore's API - GET_PFN(device, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(device, clReleaseSemaphoreKHR); - GET_PFN(device, clGetSemaphoreInfoKHR); - - test_error(!is_import_handle_available(device, - CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR), - "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the " - "supported import types"); - - // Import D3D12 fence into OpenCL - const DirectXFenceWrapper fence(dx_wrapper.getDXDevice()); - CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice()); - test_error(semaphore.createSemaphoreFromFence(*fence), - "Could not create semaphore"); - - size_t properties_size_bytes = 0; - errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0, - nullptr, &properties_size_bytes); - test_error(errcode, "Could not get semaphore info"); - std::vector semaphore_properties( - properties_size_bytes / sizeof(cl_semaphore_properties_khr)); - errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, - properties_size_bytes, - semaphore_properties.data(), nullptr); - test_error(errcode, "Could not get semaphore info"); - - for (unsigned i = 0; i < semaphore_properties.size() - 1; i++) - { - if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR - && semaphore_properties[i + 1] - == reinterpret_cast( - semaphore.getHandle())) - { - return TEST_PASS; - } - } - log_error( - "Failed to find the dx fence handle type in the semaphore properties"); - return TEST_FAIL; + return MakeAndRunTest(device, context, queue, + num_elements); } \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt index 682ada5f..6f4ac812 100644 --- a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt @@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES test_semaphores_negative_create.cpp test_semaphores_cross_queue.cpp test_semaphores_queries.cpp + test_semaphores_payload.cpp semaphore_base.h ) diff --git a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h index 372bdd1e..828a5e46 100644 --- a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h +++ b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h @@ -23,6 +23,7 @@ #include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/extensionHelpers.h" struct SemaphoreBase { @@ -37,27 +38,15 @@ struct SemaphoreBase test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); // If it is supported get the addresses of all the APIs here. - // clang-format off -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ - } - // clang-format on + GET_FUNCTION_EXTENSION_ADDRESS(device, + clCreateSemaphoreWithPropertiesKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR); + GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR); - GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR); - GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR); - GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR); - GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR); - GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR); - GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR); - GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR); - -#undef GET_EXTENSION_ADDRESS return CL_SUCCESS; } diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp index ce146b41..f4eaf5d1 100644 --- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp @@ -14,14 +14,8 @@ // limitations under the License. // -#include - #include "semaphore_base.h" -#include "semaphore_base.h" - -#define FLUSH_DELAY_S 5 - namespace { const char* source = "__kernel void empty() {}"; diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp new file mode 100644 index 00000000..94fb82e0 --- /dev/null +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp @@ -0,0 +1,86 @@ +// +// Copyright (c) 2024 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "semaphore_base.h" + +namespace { + +struct PayloadSemaphore : public SemaphoreTestBase +{ + clSemaphoreWrapper sema_sec; + + PayloadSemaphore(cl_device_id device, cl_context context, + cl_command_queue queue, cl_int nelems) + : SemaphoreTestBase(device, context, queue, nelems), sema_sec(this) + {} + + cl_int Run() override + { + cl_int err = CL_SUCCESS; + + // Create semaphore + cl_semaphore_properties_khr sema_props[] = { + static_cast(CL_SEMAPHORE_TYPE_KHR), + static_cast( + CL_SEMAPHORE_TYPE_BINARY_KHR), + 0 + }; + semaphore = + clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); + test_error(err, "Could not create semaphore"); + + sema_sec = + clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); + test_error(err, "Could not create semaphore"); + + { + cl_semaphore_payload_khr payload_list[] = { 1, 2 }; + cl_semaphore_khr semaphores[2] = { semaphore, sema_sec }; + + // Signal semaphore + err = clEnqueueSignalSemaphoresKHR( + queue, 2, semaphores, payload_list, 0, nullptr, nullptr); + test_error(err, "Could not signal semaphore"); + } + + { + cl_semaphore_payload_khr payload_list[] = { 3, 4 }; + cl_semaphore_khr semaphores[2] = { semaphore, sema_sec }; + + // Wait semaphore + err = clEnqueueWaitSemaphoresKHR(queue, 2, semaphores, payload_list, + 0, nullptr, nullptr); + test_error(err, "Could not wait semaphore"); + } + + // Finish + err = clFinish(queue); + test_error(err, "Could not finish queue"); + + return CL_SUCCESS; + } +}; + + +} // anonymous namespace + +// Confirm that a valid semaphore payload values list will be ignored if no +// semaphores in the list of sema_objects require a payload +REGISTER_TEST_VERSION(semaphores_payload, Version(1, 2)) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 6f1f51e7..77c6c2eb 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -336,9 +336,8 @@ int test_cl_image_write(cl_context context, cl_command_queue queue, get_explicit_type_name(*outType), suffix, convert); programPtr = kernelSource; - if (create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, (const char **)&programPtr, - "sample_test", "")) + if (create_single_kernel_helper(context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test")) { return -1; } diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp index 8b82b9f9..e1fa5059 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp @@ -629,7 +629,7 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context, clCreateImage(context, flag, &format, &image_desc, nullptr, &err); - test_failure_error(err, CL_INVALID_MEM_OBJECT, + test_failure_error(err, CL_INVALID_IMAGE_SIZE, "Unexpected clCreateImage return"); err = clReleaseMemObject(buffer); diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index a66e6f7e..183edc74 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -859,7 +859,9 @@ double reference_add(double x, double y) __m128 vb = _mm_set_ss((float)b); va = _mm_add_ss(va, vb); _mm_store_ss((float *)&a, va); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) + // RISC-V CPUs with default 'f' fp32 extension do not support any way to + // enable/disable FTZ mode, subnormals are always handled without flushing. // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // denorm's to zero. As such, the reference add with FTZ must be emulated in // sw. @@ -876,7 +878,7 @@ double reference_add(double x, double y) } ub; ub.d = b; cl_uint mantA, mantB; - cl_ulong addendA, addendB, sum; + cl_ulong addendA, addendB; int expA = extractf(a, &mantA); int expB = extractf(b, &mantB); cl_uint signA = ua.u & 0x80000000U; @@ -972,7 +974,7 @@ double reference_multiply(double x, double y) __m128 vb = _mm_set_ss((float)b); va = _mm_mul_ss(va, vb); _mm_store_ss((float *)&a, va); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // denorm's to zero. As such, the reference multiply with FTZ must be // emulated in sw. @@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x) long double reference_rintl(long double x) { -#if defined(__PPC__) +#if defined(__PPC__) || defined(__riscv) // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined // mantissa can represent more than LDBL_MANT_DIG binary digits. x = rintl(x); diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index 6106e0ab..78118697 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -557,8 +557,9 @@ int TestNonUniformWorkGroup::prepareDevice () { if (_testRange & Range::BARRIERS) buildOptions += " -D TESTBARRIERS"; - err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1, - &KERNEL_FUNCTION, "testKernel", buildOptions.c_str()); + err = create_single_kernel_helper(_context, &_program, &_testKernel, 1, + &KERNEL_FUNCTION, "testKernel", + buildOptions.c_str()); if (err) { log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__); @@ -842,8 +843,9 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) { if (testRange & Range::BARRIERS) buildOptions += " -D TESTBARRIERS"; - err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1, - &KERNEL_FUNCTION, "testKernel", buildOptions.c_str()); + err = create_single_kernel_helper(_context, &program, &testKernel, 1, + &KERNEL_FUNCTION, "testKernel", + buildOptions.c_str()); if (err) { log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__); diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp index 5525a554..4e1c83d9 100644 --- a/test_conformance/pipes/test_pipe_info.cpp +++ b/test_conformance/pipes/test_pipe_info.cpp @@ -63,9 +63,9 @@ REGISTER_TEST(pipe_info) log_info( " CL_PIPE_MAX_PACKETS passed.\n" ); } - err = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel", - "-cl-std=CL2.0 -cl-kernel-arg-info"); + err = create_single_kernel_helper(context, &program, &kernel, 1, + &pipe_kernel_code, "pipe_kernel", + "-cl-std=CL2.0 -cl-kernel-arg-info"); test_error_fail(err, "Error creating program"); cl_kernel_arg_type_qualifier arg_type_qualifier = 0; diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h index 993a6126..519b9dd9 100644 --- a/test_conformance/printf/test_printf.h +++ b/test_conformance/printf/test_printf.h @@ -70,14 +70,15 @@ struct printDataGenParameters { std::vector genericFormats; const char* dataRepresentation; - const char* vectorFormatFlag; - const char* vectorFormatSpecifier; - const char* dataType; - const char* vectorSize; - const char* addrSpaceArgumentTypeQualifier; - const char* addrSpaceVariableTypeQualifier; - const char* addrSpaceParameter; - const char* addrSpacePAdd; + const char* vectorFormatFlag = nullptr; + const char* vectorFormatSpecifier = nullptr; + const char* dataType = nullptr; + const char* vectorSize = nullptr; + const char* addrSpaceArgumentTypeQualifier = nullptr; + const char* addrSpaceVariableTypeQualifier = nullptr; + const char* addrSpaceParameter = nullptr; + const char* addrSpacePAdd = nullptr; + bool allowFallbackTest = false; }; // Reference results - filled out at run-time @@ -111,6 +112,9 @@ struct testCase char*, const size_t); //function pointer for generating reference results Type dataType; //the data type that will be printed during reference result generation (used for setting rounding mode) + bool (*fallbackTestFN)(const char*, + const char*) = + nullptr; // function pointer to perform fallback test if required }; extern const char* strType[]; diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp index 83a21fbb..39b877ff 100644 --- a/test_conformance/printf/util_printf.cpp +++ b/test_conformance/printf/util_printf.cpp @@ -26,8 +26,11 @@ static void intRefBuilder(printDataGenParameters&, char*, const size_t); static void halfRefBuilder(printDataGenParameters&, char* rResult, const size_t); static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t); +static bool floatRefTest(const char* refResult, const char* analysisBuffer); static void doubleRefBuilder(printDataGenParameters&, char* rResult, const size_t); +static bool doubleRefTest(const char* refResult, const char* analysisBuffer); + static void octalRefBuilder(printDataGenParameters&, char*, const size_t); static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t); static void hexRefBuilder(printDataGenParameters&, char*, const size_t); @@ -468,12 +471,12 @@ std::vector printFloatGenParameters = { // Double argument representing floating-point,in [-]xh.hhhhpAd style - { { "%.6a" }, "0.1f" }, + { { "%.6a" }, "0.5f", 0, 0, 0, 0, 0, 0, 0, 0, true }, //(Minimum)Ten-wide,Double argument representing floating-point,in // xh.hhhhpAd style,default(right)-justified - { { "%10.2a" }, "9990.235f" }, + { { "%10.2a" }, "1.5f", 0, 0, 0, 0, 0, 0, 0, 0, true }, //(Minimum)Ten-wide,two positions after the decimal,with // a blank space inserted before the value, default(right)-justified @@ -502,8 +505,9 @@ testCase testCaseFloat = { floatRefBuilder, - kfloat + kfloat, + floatRefTest }; //============================================== @@ -673,12 +677,12 @@ std::vector printDoubleGenParameters = { // Double argument representing floating-point,in [-]xh.hhhhpAd style - { { "%.6a" }, "0.1" }, + { { "%.6a" }, "0.5", 0, 0, 0, 0, 0, 0, 0, 0, true }, //(Minimum)Ten-wide,Double argument representing floating-point,in // xh.hhhhpAd style,default(right)-justified - { { "%10.2a" }, "9990.235" }, + { { "%10.2a" }, "1.5", 0, 0, 0, 0, 0, 0, 0, 0, true }, }; //--------------------------------------------------------- @@ -697,8 +701,9 @@ testCase testCaseDouble = { doubleRefBuilder, - kdouble + kdouble, + doubleRefTest }; //============================================== @@ -1032,6 +1037,9 @@ testCase testCaseChar = { std::vector printStringGenParameters = { + // empty format, no data representation + { {""} }, + // empty format { {""}, "\"foo\"" }, @@ -1094,6 +1102,8 @@ std::vector correctBufferString = { "", + "", + " foo", "f", @@ -1752,7 +1762,15 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId return !std::regex_match(analysisBuffer, nanRegex); } - return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str()); + size_t ret = + strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str()); + + if (ret != 0 && pTestCase->_genParameters[testId].allowFallbackTest + && pTestCase->fallbackTestFN) + if (pTestCase->fallbackTestFN( + analysisBuffer, pTestCase->_correctBuffer[testId].c_str())) + return 0; + return ret; } static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) @@ -1776,6 +1794,13 @@ static void floatRefBuilder(printDataGenParameters& params, char* refResult, con strtof(params.dataRepresentation, NULL)); } +static bool floatRefTest(const char* refResult, const char* analysisBuffer) +{ + float test = strtof(analysisBuffer, NULL); + float expected = strtof(refResult, NULL); + return test == expected; +} + static void doubleRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { @@ -1783,6 +1808,13 @@ static void doubleRefBuilder(printDataGenParameters& params, char* refResult, strtod(params.dataRepresentation, NULL)); } +static bool doubleRefTest(const char* refResult, const char* analysisBuffer) +{ + double test = strtod(analysisBuffer, NULL); + double expected = strtod(refResult, NULL); + return test == expected; +} + static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10); diff --git a/test_conformance/profiling/profiling_timebase.cpp b/test_conformance/profiling/profiling_timebase.cpp index f26a9d89..23afcbb8 100644 --- a/test_conformance/profiling/profiling_timebase.cpp +++ b/test_conformance/profiling/profiling_timebase.cpp @@ -18,7 +18,7 @@ const char *kernelCode = "__kernel void kernel_empty(){}"; -REGISTER_TEST(profiling_timebase) +REGISTER_TEST_VERSION(profiling_timebase, Version(2, 1)) { Version version = get_device_cl_version(device); cl_platform_id platform = getPlatformFromDevice(device); diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp index c0757017..48a6473b 100644 --- a/test_conformance/vectors/structs.cpp +++ b/test_conformance/vectors/structs.cpp @@ -341,9 +341,9 @@ int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState, { if ((targetArr[i]) % minAlign != (cl_uint)0) { - vlog_error( - "Error %zu (of %zu). Expected a multiple of %zx, got %x\n", i, - pClState->m_numThreads, minAlign, targetArr[i]); + vlog_error("Error in work-item %zu (of %zu). Expected a multiple " + "of 0x%zx, got 0x%x\n", + i, pClState->m_numThreads, minAlign, targetArr[i]); return -1; } } @@ -371,8 +371,9 @@ int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState, { if (targetArr[i] != targetSize) { - vlog_error("Error %zu (of %zu). Expected %d, got %d\n", i, - pClState->m_numThreads, targetSize, targetArr[i]); + vlog_error( + "Error in work-item %zu (of %zu). Expected %d, got %d\n", i, + pClState->m_numThreads, targetSize, targetArr[i]); return -1; } } @@ -390,10 +391,11 @@ int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState, { if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0) { - vlog_error( - "Error %zu (of %zu). Expected %zu more than a multiple of " - "%zu, got %d \n", - i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]); + vlog_error("Error in work-item %zu (of %zu). Expected %zu more " + "than a multiple of " + "%zu, got %d \n", + i, pClState->m_numThreads, beforeSize, totSize, + targetArr[i]); return -1; } } diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp index 3e4eae55..3d059ce5 100644 --- a/test_conformance/vulkan/shaders/buffer.comp +++ b/test_conformance/vulkan/shaders/buffer.comp @@ -1,28 +1,28 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable -#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable -#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable - -#define MAX_BUFFERS 5 - -layout(binding = 0) buffer Params -{ - uint32_t numBuffers; - uint32_t bufferSize; - uint32_t interBufferOffset; -}; -layout(binding = 1) buffer Buffer -{ - uint8_t ptr[]; -} bufferPtrList[MAX_BUFFERS]; -layout(local_size_x = 128) in; -void main() { - for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) { - uint32_t ptrIdx = gl_GlobalInvocationID.x; - uint32_t limit = bufferSize; - while (ptrIdx < limit) { - bufferPtrList[bufIdx].ptr[ptrIdx]++; - ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x); - } - } +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable +#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable + +layout(constant_id = 0) const uint MAX_BUFFERS = 5; + +layout(binding = 0) buffer Params +{ + uint32_t numBuffers; + uint32_t bufferSize; + uint32_t interBufferOffset; +}; +layout(binding = 1) buffer Buffer +{ + uint8_t ptr[]; +} bufferPtrList[MAX_BUFFERS]; +layout(local_size_x = 128) in; +void main() { + for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) { + uint32_t ptrIdx = gl_GlobalInvocationID.x; + uint32_t limit = bufferSize; + while (ptrIdx < limit) { + bufferPtrList[bufIdx].ptr[ptrIdx]++; + ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x); + } + } } \ No newline at end of file diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index bd19987b..0d09a748 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -26,6 +26,7 @@ #endif #include +#include #include #include #include @@ -79,9 +80,9 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase VulkanBufferList vkBufferList(1, *vkDevice, bufferSize, vkExternalMemoryHandleType); - VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( + std::unique_ptr vkDeviceMem(new VulkanDeviceMemory( *vkDevice, vkBufferList[0], memoryTypeList[0], - vkExternalMemoryHandleType); + vkExternalMemoryHandleType)); vkDeviceMem->bindBuffer(vkBufferList[0], 0); diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index e39641f1..9e9a7fcb 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -128,12 +128,24 @@ int run_test_with_two_queue( vkDescriptorSetLayoutBindingList.addBinding( 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); vkDescriptorSetLayoutBindingList.addBinding( - 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); - VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, - vkBufferShaderModule); + + VkSpecializationMapEntry entry; + entry.constantID = 0; + entry.offset = 0; + entry.size = sizeof(uint32_t); + + VkSpecializationInfo spec; + spec.mapEntryCount = 1; + spec.pMapEntries = &entry; + spec.dataSize = sizeof(uint32_t); + spec.pData = &numBuffers; + + VulkanComputePipeline vkComputePipeline( + vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec); VulkanDescriptorPool vkDescriptorPool(vkDevice, vkDescriptorSetLayoutBindingList); @@ -461,12 +473,24 @@ int run_test_with_one_queue( vkDescriptorSetLayoutBindingList.addBinding( 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); vkDescriptorSetLayoutBindingList.addBinding( - 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); - VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, - vkBufferShaderModule); + + VkSpecializationMapEntry entry; + entry.constantID = 0; + entry.offset = 0; + entry.size = sizeof(uint32_t); + + VkSpecializationInfo spec; + spec.mapEntryCount = 1; + spec.pMapEntries = &entry; + spec.dataSize = sizeof(uint32_t); + spec.pData = &numBuffers; + + VulkanComputePipeline vkComputePipeline( + vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec); VulkanDescriptorPool vkDescriptorPool(vkDevice, vkDescriptorSetLayoutBindingList); @@ -764,12 +788,24 @@ int run_test_with_multi_import_same_ctx( vkDescriptorSetLayoutBindingList.addBinding( 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); vkDescriptorSetLayoutBindingList.addBinding( - 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); - VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, - vkBufferShaderModule); + + VkSpecializationMapEntry entry; + entry.constantID = 0; + entry.offset = 0; + entry.size = sizeof(uint32_t); + + VkSpecializationInfo spec; + spec.mapEntryCount = 1; + spec.pMapEntries = &entry; + spec.dataSize = sizeof(uint32_t); + spec.pData = &numBuffers; + + VulkanComputePipeline vkComputePipeline( + vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec); VulkanDescriptorPool vkDescriptorPool(vkDevice, vkDescriptorSetLayoutBindingList); @@ -1103,12 +1139,24 @@ int run_test_with_multi_import_diff_ctx( vkDescriptorSetLayoutBindingList.addBinding( 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); vkDescriptorSetLayoutBindingList.addBinding( - 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); - VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout, - vkBufferShaderModule); + + VkSpecializationMapEntry entry; + entry.constantID = 0; + entry.offset = 0; + entry.size = sizeof(uint32_t); + + VkSpecializationInfo spec; + spec.mapEntryCount = 1; + spec.pMapEntries = &entry; + spec.dataSize = sizeof(uint32_t); + spec.pData = &numBuffers; + + VulkanComputePipeline vkComputePipeline( + vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec); VulkanDescriptorPool vkDescriptorPool(vkDevice, vkDescriptorSetLayoutBindingList); @@ -1586,7 +1634,7 @@ struct BufferTestBase : public VulkanTestBase { BufferTestBase(cl_device_id device, cl_context context, cl_command_queue queue, cl_int nelems) - : VulkanTestBase(device, context, queue, nelems) + : VulkanTestBase(device, context, queue, nelems, true) {} int test_buffer_common(bool use_fence) diff --git a/test_conformance/vulkan/vulkan_test_base.h b/test_conformance/vulkan/vulkan_test_base.h index 8c7b07cc..1e3f8e2f 100644 --- a/test_conformance/vulkan/vulkan_test_base.h +++ b/test_conformance/vulkan/vulkan_test_base.h @@ -37,11 +37,13 @@ inline void params_reset() struct VulkanTestBase { VulkanTestBase(cl_device_id device, cl_context context, - cl_command_queue queue, cl_int nelems) + cl_command_queue queue, cl_int nelems, + bool useShaderInt8 = false) : device(device), context(context), num_elems(nelems) { vkDevice.reset(new VulkanDevice( - getAssociatedVulkanPhysicalDevice(device, useValidationLayers))); + getAssociatedVulkanPhysicalDevice(device, useValidationLayers), + getDefaultVulkanQueueFamilyToQueueCountMap(), useShaderInt8)); cl_platform_id platform; cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,