diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 2faac8b5..32884842 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -10,7 +10,7 @@ jobs: matrix: build-type: [Release] gl: [0] - os: [ubuntu-22.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest, windows-11-arm] include: - os: ubuntu-22.04 gl: 1 @@ -28,7 +28,7 @@ jobs: arch: android-aarch64 android_arch_abi: arm64-v8a steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Setup Ninja uses: seanmiddleditch/gha-setup-ninja@master - name: Install Arm and AArch64 compilers @@ -62,6 +62,10 @@ jobs: git checkout cl_khr_unified_svm ln -s CL OpenCL # For OSX builds cd .. + - name: Fetch SPIR-V Headers + shell: bash + run: | + git clone https://github.com/KhronosGroup/SPIRV-Headers.git - name: Install Vulkan SDK uses: humbletim/install-vulkan-sdk@main with: @@ -70,12 +74,13 @@ jobs: - name: Install Android NDK if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }} run: | - wget https://dl.google.com/android/repository/android-ndk-r27c-linux.zip -O android-ndk.zip + wget https://dl.google.com/android/repository/android-ndk-r28c-linux.zip -O android-ndk.zip unzip android-ndk.zip -d $HOME - export ANDROID_NDK=$HOME/android-ndk-r27c + export ANDROID_NDK=$HOME/android-ndk-r28c echo "ANDROID_NDK=$ANDROID_NDK" >> $GITHUB_ENV export ANDROID_ARCH_ABI=${{ matrix.android_arch_abi }} echo "ANDROID_ARCH_ABI=$ANDROID_ARCH_ABI" >> $GITHUB_ENV + echo "ANDROID_PLATFORM=29" >> $GITHUB_ENV - name: Prepare CMake Toolchain file shell: bash run: | @@ -108,7 +113,7 @@ jobs: if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }} shell: bash run: | - echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI}" >> $GITHUB_ENV + echo "CMAKE_ADDITIONAL_CONFIG_ARGS=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV - name: Fetch and build OpenCL ICD Loader shell: bash run: | @@ -120,7 +125,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \ - "${CMAKE_CONFIG_ARGS_ANDROID}" + ${CMAKE_ADDITIONAL_CONFIG_ARGS} cmake --build . --parallel - name: Fetch Vulkan Headers shell: bash @@ -150,6 +155,7 @@ jobs: cd build if [[ ${RUNNER_OS} == "Windows" ]]; then CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL" + CMAKE_ADDITIONAL_CONFIG_ARGS="-DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON" else CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL" if [[ '${{ matrix.arch }}' != android-* ]]; then @@ -158,8 +164,10 @@ jobs: fi cmake .. -G Ninja \ -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ - -DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \ + -DSPIRV_INCLUDE_DIR='${{ github.workspace }}'/SPIRV-Headers \ -DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \ @@ -169,7 +177,7 @@ jobs: -DVULKAN_IS_SUPPORTED=ON \ -DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \ -DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \ - "${CMAKE_CONFIG_ARGS_ANDROID}" + ${CMAKE_ADDITIONAL_CONFIG_ARGS} cmake --build . --parallel formatcheck: name: Check code format @@ -177,7 +185,7 @@ jobs: steps: - name: Install packages run: sudo apt install -y clang-format clang-format-14 - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 - name: Check code format diff --git a/CMakeLists.txt b/CMakeLists.txt index f34ade8e..90c343fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,12 @@ else(CL_INCLUDE_DIR AND CL_LIB_DIR) message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR") endif(CL_INCLUDE_DIR AND CL_LIB_DIR) +# SPIRV_INCLUDE_DIR - path to dir with SPIR-V headers +if(NOT SPIRV_INCLUDE_DIR) + message(STATUS "SPIR-V headers haven't been found!") + message(FATAL_ERROR "Pass -DSPIRV_INCLUDE_DIR") +endif(NOT SPIRV_INCLUDE_DIR) + # CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR) link_directories(${CLConform_GL_LIBRARIES_DIR}) @@ -195,6 +201,7 @@ if(APPLE) endif(APPLE) include_directories(SYSTEM ${CL_INCLUDE_DIR}) +include_directories(SYSTEM ${SPIRV_INCLUDE_DIR}/include) include_directories(${CLConform_SOURCE_DIR}/test_common/harness ${CLConform_SOURCE_DIR}/test_common/gles ${CLConform_SOURCE_DIR}/test_common/gl diff --git a/README.md b/README.md index 0cc09b1b..77e4d9a9 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ Compiling the CTS requires the following CMake configuration options to be set: * `CL_INCLUDE_DIR` Points to the unified [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers). +* `SPIRV_INCLUDE_DIR` Points to the unified + [SPIRV-Headers](https://github.com/KhronosGroup/SPIRV-Headers). * `CL_LIB_DIR` Directory containing the OpenCL library to build against. * `SPIRV_TOOLS_DIR` Directory containing the `spirv-as` and `spirv-val` binaries to be used in the CTS build process. Alternatively, the location to these binaries @@ -31,6 +33,7 @@ a build, and compile. ```sh git clone https://github.com/KhronosGroup/OpenCL-CTS.git git clone https://github.com/KhronosGroup/OpenCL-Headers.git +git clone https://github.com/KhronosGroup/SPIRV-Headers.git git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git git clone https://github.com/KhronosGroup/SPIRV-Tools.git git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers @@ -50,6 +53,7 @@ cmake --build SPIRV-Tools/build --config Release mkdir OpenCL-CTS/build cmake -S OpenCL-CTS -B OpenCL-CTS/build \ -DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \ + -DSPIRV_INCLUDE_DIR=$PWD/SPIRV-Headers \ -DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \ -DSPIRV_TOOLS_DIR=$PWD/SPIRV-Tools/build/tools/ \ -DOPENCL_LIBRARIES=OpenCL diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp index fb1291d6..fab778c8 100644 --- a/test_common/harness/ThreadPool.cpp +++ b/test_common/harness/ThreadPool.cpp @@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p) // drop run count to 0 gRunCount = 0; +#if defined(_M_IX86) || defined(_M_X64) _mm_mfence(); +#elif defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHST); +#else +#error Architecture needs an implementation +#endif + #else if (pthread_mutex_lock(&gAtomicLock)) log_error( @@ -703,7 +710,13 @@ void ThreadPool_Exit(void) // http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins __sync_synchronize(); #elif defined(_MSC_VER) +#if defined(_M_IX86) || defined(_M_X64) _mm_mfence(); +#elif defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHST); +#else +#error Architecture needs an implementation +#endif #else #warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed #endif diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index 18c2869d..e0e326ff 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -23,10 +23,10 @@ #include -#if defined(__SSE__) || defined(_MSC_VER) +#if defined(__SSE__) || _M_IX86_FP == 1 #include #endif -#if defined(__SSE2__) || defined(_MSC_VER) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) #include #endif @@ -110,7 +110,7 @@ static long lrintf_clamped(float f) volatile float x = f; float magicVal = magic[f < 0]; -#if defined(__SSE__) || defined(_WIN32) +#if defined(__SSE__) || _M_IX86_FP == 1 // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128 v = _mm_set_ss(x); @@ -150,7 +150,7 @@ static long lrint_clamped(double f) { volatile double x = f; double magicVal = magic[f < 0]; -#if defined(__SSE2__) || (defined(_MSC_VER)) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128d v = _mm_set_sd(x); diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp index fe65f0cc..b367555a 100644 --- a/test_common/harness/errorHelpers.cpp +++ b/test_common/harness/errorHelpers.cpp @@ -387,8 +387,7 @@ static float Ulp_Error_Half_Float(float test, double reference) } // reference is a normal power of two or a zero - int ulp_exp = - HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1); + int ulp_exp = HALF_MANT_DIG - std::max(ilogb(reference), HALF_MIN_EXP); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -469,8 +468,7 @@ float Ulp_Error(float test, double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1); + int ulp_exp = FLT_MANT_DIG - std::max(ilogb(reference), FLT_MIN_EXP); // Scale the exponent of the error return (float)scalbn(testVal - reference, ulp_exp); @@ -553,8 +551,7 @@ float Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP); // Scale the exponent of the error float result = (float)scalbnl(testVal - reference, ulp_exp); diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index 12aba0a9..afb0f5a3 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -37,36 +37,44 @@ typedef int FPU_mode_type; #else typedef int64_t FPU_mode_type; #endif -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) #include +#elif defined(_M_ARM64) +#include #elif defined(__PPC__) #include extern __thread fpu_control_t fpu_control; #elif defined(__mips__) #include "mips/m32c1.h" #endif + // Set the reference hardware floating point unit to FTZ mode -inline void ForceFTZ(FPU_mode_type *mode) +inline void ForceFTZ(FPU_mode_type *oldMode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr(*mode | 0x8040); +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) + *oldMode = _mm_getcsr(); + _mm_setcsr(*oldMode | 0x8040); #elif defined(__PPC__) - *mode = fpu_control; + *oldMode = fpu_control; fpu_control |= _FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + *oldMode = fpscr; + _WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24)); #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); #else @@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode) } // Disable the denorm flush to zero -inline void DisableFTZ(FPU_mode_type *mode) +inline void DisableFTZ(FPU_mode_type *oldMode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr(*mode & ~0x8040); +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) + *oldMode = _mm_getcsr(); + _mm_setcsr(*oldMode & ~0x8040); #elif defined(__PPC__) *mode = fpu_control; fpu_control &= ~_FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang uint64_t fpscr; __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; + *oldMode = fpscr; __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + *oldMode = fpscr; + _WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24)); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); #else @@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode) // Restore the reference hardware to floating point state indicated by *mode inline void RestoreFPState(FPU_mode_type *mode) { -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ - || defined(__MINGW32__) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) || defined(__MINGW32__) _mm_setcsr(*mode); #elif defined(__PPC__) fpu_control = *mode; #elif defined(__arm__) __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); // Add 64 bit support -#elif defined(__aarch64__) +#elif defined(__aarch64__) // Clang __asm__ volatile("msr fpcr, %0" ::"r"(*mode)); +#elif defined(_M_ARM64) // Visual Studio + _WriteStatusReg(ARM64_FPCR, *mode); #elif defined(__mips__) // Mips runs by default with DAZ=1 FTZ=1 #else @@ -125,4 +140,4 @@ inline void RestoreFPState(FPU_mode_type *mode) #error ForceFTZ and RestoreFPState need implentations #endif -#endif +#endif \ No newline at end of file diff --git a/test_common/harness/mathHelpers.h b/test_common/harness/mathHelpers.h new file mode 100644 index 00000000..cdbf2bfe --- /dev/null +++ b/test_common/harness/mathHelpers.h @@ -0,0 +1,35 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef _mathHelpers_h +#define _mathHelpers_h + +#if defined(__APPLE__) +#include +#else +#include +#endif +#include + +template inline bool isnan_fp(const T &v) { return std::isnan(v); } + +template <> inline bool isnan_fp(const cl_half &v) +{ + uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((cl_half)v) & 0x3FF; + return (h_exp == 0x1F && h_mant != 0); +} + +#endif // _mathHelpers_h diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c index ef70035f..c0042928 100644 --- a/test_common/harness/msvc9.c +++ b/test_common/harness/msvc9.c @@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern) #endif // !__has_builtin(__builtin_clz) #include +#if !defined(_M_ARM64) #include +#endif int usleep(int usec) { diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp index 29499381..65167116 100644 --- a/test_common/harness/parseParameters.cpp +++ b/test_common/harness/parseParameters.cpp @@ -35,6 +35,8 @@ std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM; bool gDisableSPIRVValidation = false; std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR; unsigned gNumWorkerThreads; +bool gListTests = false; +bool gWimpyMode = false; void helpInfo() { @@ -49,6 +51,12 @@ void helpInfo() spir-v Use SPIR-V offline compilation --num-worker-threads Select parallel execution with the specified number of worker threads. + --list + List sub-tests + -w, --wimpy + Enable wimpy mode. It does not impact all tests. Impacted tests will run + with a very small subset of the tests. This option should not be used + for conformance submission (default: disabled). For offline compilation (binary and spir-v modes) only: --compilation-cache-mode @@ -104,6 +112,16 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore) // option and print its own help. helpInfo(); } + else if (!strcmp(argv[i], "--list") || !strcmp(argv[i], "-list")) + { + delArg++; + gListTests = true; + } + else if (!strcmp(argv[i], "--wimpy") || !strcmp(argv[i], "-w")) + { + delArg++; + gWimpyMode = true; + } else if (!strcmp(argv[i], "--compilation-mode")) { delArg++; diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h index 437e12f9..ef8a7cb6 100644 --- a/test_common/harness/parseParameters.h +++ b/test_common/harness/parseParameters.h @@ -40,6 +40,8 @@ extern std::string gCompilationCachePath; extern std::string gCompilationProgram; extern bool gDisableSPIRVValidation; extern std::string gSPIRVValidator; +extern bool gListTests; +extern bool gWimpyMode; extern int parseCustomParam(int argc, const char *argv[], const char *ignore = 0); diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index b2e443b7..5aeb86f1 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -193,7 +193,8 @@ RoundingMode get_round(void) // basic_test_conversions.c in which case, these function are at // liberty to do nothing. // -#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) #include #elif defined(__PPC__) #include @@ -203,18 +204,24 @@ RoundingMode get_round(void) void *FlushToZero(void) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) union { unsigned int i; void *p; } u = { _mm_getcsr() }; _mm_setcsr(u.i | 0x8040); return u.p; -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) // Clang int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr | FPSCR_FZ); return NULL; +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + _WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24)); + return NULL; #elif defined(__PPC__) fpu_control_t flags = 0; _FPU_GETCW(flags); @@ -237,16 +244,21 @@ void *FlushToZero(void) void UnFlushToZero(void *p) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) -#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ + || defined(_M_X64) union { void *p; unsigned int i; } u = { p }; _mm_setcsr(u.i); -#elif defined(__arm__) || defined(__aarch64__) +#elif defined(__arm__) || defined(__aarch64__) // Clang int64_t fpscr; _FPU_GETCW(fpscr); _FPU_SETCW(fpscr & ~FPSCR_FZ); +#elif defined(_M_ARM64) // Visual Studio + uint64_t fpscr; + fpscr = _ReadStatusReg(ARM64_FPSR); + _WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24)); #elif defined(__PPC__) fpu_control_t flags = 0; _FPU_GETCW(flags); diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index df54a35d..c745a639 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -169,6 +170,19 @@ void version_expected_info(const char *test_name, const char *api_name, "reports %s version %s)\n", test_name, api_name, expected_version, api_name, device_version); } + +static void list_tests(int testNum, test_definition testList[]) +{ + std::set names; + for (int i = 0; i < testNum; i++) + { + names.insert(testList[i].name); + } + for (const auto &name : names) + { + log_info("\t%s\n", name.c_str()); + } +} int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, test_definition testList[], int forceNoContextCreation, @@ -197,8 +211,11 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, if (env_mode != NULL) { based_on_env_var = 1; - if (strcmp(env_mode, "gpu") == 0 - || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) + if (strcmp(env_mode, "all") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_ALL") == 0) + device_type = CL_DEVICE_TYPE_ALL; + else if (strcmp(env_mode, "gpu") == 0 + || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0) device_type = CL_DEVICE_TYPE_GPU; else if (strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0) @@ -255,10 +272,23 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, return EXIT_FAILURE; } - /* Special case: just list the tests */ - if ((argc > 1) - && (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h") - || !strcmp(argv[1], "--help"))) + if (gListTests) + { + list_tests(testNum, testList); + return EXIT_SUCCESS; + } + + gWimpyMode |= (getenv("CL_WIMPY_MODE") != nullptr); + if (gWimpyMode) + { + log_info("\n"); + log_info("**************************\n"); + log_info("*** Wimpy mode enabled ***\n"); + log_info("**************************\n"); + log_info("\n"); + } + + if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME"); @@ -271,7 +301,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, "(default 0).\n"); log_info("\tid\t\tIndicates device at index should be used " "(default 0).\n"); - log_info("\t\tcpu|gpu|accelerator| " + log_info("\t\tall|cpu|gpu|accelerator| " "(default CL_DEVICE_TYPE_DEFAULT)\n"); log_info("\n"); log_info("\tNOTE: You may pass environment variable " @@ -281,10 +311,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, log_info("\n"); log_info("Test names:\n"); - for (int i = 0; i < testNum; i++) - { - log_info("\t%s\n", testList[i].name); - } + list_tests(testNum, testList); return EXIT_SUCCESS; } @@ -320,8 +347,14 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, /* Do we have a CPU/GPU specification? */ if (argc > 1) { - if (strcmp(argv[argc - 1], "gpu") == 0 - || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0) + if (strcmp(argv[argc - 1], "all") == 0 + || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ALL") == 0) + { + device_type = CL_DEVICE_TYPE_ALL; + argc--; + } + else if (strcmp(argv[argc - 1], "gpu") == 0 + || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0) { device_type = CL_DEVICE_TYPE_GPU; argc--; @@ -376,6 +409,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum, switch (device_type) { + case CL_DEVICE_TYPE_ALL: log_info("Requesting any device "); break; case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break; case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break; case CL_DEVICE_TYPE_ACCELERATOR: diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index 32ed18b4..cc9d8212 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -28,8 +28,8 @@ public: Version(): m_major(0), m_minor(0) {} Version(cl_uint major, cl_uint minor): m_major(major), m_minor(minor) {} - int major() const { return m_major; } - int minor() const { return m_minor; } + int get_major() const { return m_major; } + int get_minor() const { return m_minor; } bool operator>(const Version &rhs) const { return to_uint() > rhs.to_uint(); diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt index f2bfac35..b781e49b 100644 --- a/test_conformance/api/CMakeLists.txt +++ b/test_conformance/api/CMakeLists.txt @@ -1,5 +1,7 @@ set(MODULE_NAME API) +find_package(Python3 COMPONENTS Interpreter QUIET) + set(${MODULE_NAME}_SOURCES main.cpp negative_platform.cpp @@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES test_pipe_properties_queries.cpp test_wg_suggested_local_work_size.cpp test_device_command_queue.cpp + test_spirv_queries.cpp + ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def ) +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def + COMMENT "Generating spirv_capability_deps.def..." + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py + --grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json" + --output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def" + DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json" + USES_TERMINAL + VERBATIM) + include(../CMakeCommon.txt) + +target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/test_conformance/api/generate_spirv_capability_deps.py b/test_conformance/api/generate_spirv_capability_deps.py new file mode 100644 index 00000000..ef59b6e8 --- /dev/null +++ b/test_conformance/api/generate_spirv_capability_deps.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +##################################################################### +# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +##################################################################### + +""" +Generates a file describing the SPIR-V extension dependencies or SPIR-V version +dependencies for a SPIR-V capability. This can be used to ensure that if support +for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V +version is also supported. +""" + +import argparse +import json + +header_text = """\ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file is generated from the SPIR-V JSON grammar file. +// Please do not edit it directly! +""" + +def main(): + parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities') + + parser.add_argument('--grammar', metavar='', + type=str, required=True, + help='input JSON grammar file') + parser.add_argument('--output', metavar='', + type=str, required=False, + help='output file path (default: stdout)') + args = parser.parse_args() + + dependencies = {} + capabilities = [] + with open(args.grammar) as json_file: + grammar_json = json.loads(json_file.read()) + for operand_kind in grammar_json['operand_kinds']: + if operand_kind['kind'] == 'Capability': + for cap in operand_kind['enumerants']: + capname = cap['enumerant'] + capabilities.append(capname) + dependencies[capname] = {} + dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else [] + dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else "" + + capabilities.sort() + + output = [] + output.append(header_text) + output.append("// clang-format off") + if False: + for cap in capabilities: + deps = dependencies[cap] + extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions']) + + output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format( + cap, extensions_str, deps['version'])) + else: + for cap in capabilities: + deps = dependencies[cap] + if deps['version'] != "": + output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version'])) + for ext in deps['extensions']: + output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext)) + output.append("// clang-format on") + + if args.output: + with open(args.output, 'w') as output_file: + output_file.write('\n'.join(output)) + else: + print('\n'.join(output)) + +if __name__ == '__main__': + main() diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp index cd0934d0..29677623 100644 --- a/test_conformance/api/test_api_min_max.cpp +++ b/test_conformance/api/test_api_min_max.cpp @@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size) pixelBytes = maxAllocSize / maxDimensionPixels; if (pixelBytes == 0) { - log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than " - "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image " - "of maximum size!\n"); - return -1; + log_info( + "Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, " + "therefore the size of the allocated image may be larger than the " + "scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n", + maxDimensionPixels, maxAllocSize); + pixelBytes = 1; } error = -1; diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 30452caa..3c156d87 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -87,6 +87,35 @@ const char *sample_two_kernel_program[] = { "\n" "}\n" }; +const char *sample_mem_obj_size_test_kernel = R"( + __kernel void mem_obj_size_test(__global int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + } +)"; + +const char *sample_local_size_test_kernel = R"( + __kernel void local_size_test(__local int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + } +)"; + +const char *sample_read_only_image_test_kernel = R"( + __kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src) + { + write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]); + } +)"; + +const char *sample_write_only_image_test_kernel = R"( + __kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst) + { + dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1))); + } +)"; REGISTER_TEST(get_kernel_info) { @@ -669,7 +698,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) test_error(error, "Unable to get sample_image_test kernel for built program"); - std::vector mem_data(size_dim * size_dim); + std::vector mem_data(size_dim * size_dim * 4); buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR, sizeof(cl_int) * size_dim, mem_data.data(), &error); test_error(error, "clCreateBuffer failed"); @@ -704,3 +733,169 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg) return TEST_PASS; } + +REGISTER_TEST(negative_invalid_arg_mem_obj) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper mem_obj_arg_kernel; + + // Setup the test + error = + create_single_kernel_helper(context, &program, nullptr, 1, + &sample_mem_obj_size_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error); + test_error(error, + "Unable to get mem_obj_size_test kernel for built program"); + + std::vector mem_data(256, 0); + clMemWrapper buffer = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error); + test_error(error, "clCreateBuffer failed"); + + // Run the test - CL_INVALID_ARG_SIZE + error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when " + "argument is a memory object and arg_size > sizeof(cl_mem)", + TEST_FAIL); + + error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when " + "argument is a memory object and arg_size < sizeof(cl_mem)", + TEST_FAIL); + + return TEST_PASS; +} + +REGISTER_TEST(negative_invalid_kernel) +{ + cl_int error = CL_SUCCESS; + clKernelWrapper kernel; + + clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY, + sizeof(cl_float), NULL, &error); + test_error(error, "clCreateBuffer failed"); + + // Run the test - CL_INVALID_KERNEL + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem); + test_failure_error_ret( + error, CL_INVALID_KERNEL, + "clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel " + "is not a valid kernel object", + TEST_FAIL); + + return TEST_PASS; +} + +REGISTER_TEST(negative_invalid_arg_index) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper kernel; + + // Setup the test + error = create_single_kernel_helper(context, &program, nullptr, 1, + sample_single_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + kernel = clCreateKernel(program, "sample_test", &error); + test_error(error, "Unable to get sample_test kernel for built program"); + + // Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr); + test_failure_error_ret( + error, CL_INVALID_ARG_INDEX, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when " + "arg_index is not a valid argument index", + TEST_FAIL); + + return TEST_PASS; +} + +REGISTER_TEST(negative_invalid_arg_size_local) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper local_arg_kernel; + + // Setup the test + error = create_single_kernel_helper( + context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr); + test_error(error, "Unable to build test program"); + + local_arg_kernel = clCreateKernel(program, "local_size_test", &error); + test_error(error, "Unable to get local_size_test kernel for built program"); + + // Run the test + error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr); + test_failure_error_ret( + error, CL_INVALID_ARG_SIZE, + "clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is " + "passed to a local qualifier kernel argument", + TEST_FAIL); + + return TEST_PASS; +} + +REGISTER_TEST(negative_set_read_write_image_arg) +{ + cl_int error = CL_SUCCESS; + clProgramWrapper program; + clKernelWrapper write_image_kernel, read_image_kernel; + clMemWrapper write_only_image, read_only_image; + const char *test_kernels[2] = { sample_read_only_image_test_kernel, + sample_write_only_image_test_kernel }; + constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 }; + const int size_dim = 128; + + PASSIVE_REQUIRE_IMAGE_SUPPORT(device); + + // Setup the test + error = create_single_kernel_helper(context, &program, nullptr, 2, + test_kernels, nullptr); + test_error(error, "Unable to build test program"); + + read_image_kernel = clCreateKernel(program, "read_only_image_test", &error); + test_error(error, + "Unable to get read_only_image_test kernel for built program"); + + write_image_kernel = + clCreateKernel(program, "write_only_image_test", &error); + test_error(error, + "Unable to get write_only_image_test kernel for built program"); + + read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format, + size_dim, size_dim, 0, nullptr, &error); + test_error(error, "create_image_2d failed"); + + write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, + size_dim, size_dim, 0, nullptr, &error); + test_error(error, "create_image_2d failed"); + + // Run the test + error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image), + &read_only_image); + test_failure_error_ret(error, CL_INVALID_ARG_VALUE, + "clSetKernelArg is supposed to fail " + "with CL_INVALID_ARG_VALUE when an image is " + "created with CL_MEM_READ_ONLY is " + "passed to a write_only kernel argument", + TEST_FAIL); + + error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image), + &write_only_image); + test_failure_error_ret(error, CL_INVALID_ARG_VALUE, + "clSetKernelArg is supposed to fail " + "with CL_INVALID_ARG_VALUE when an image is " + "created with CL_MEM_WRITE_ONLY is " + "passed to a read_only kernel argument", + TEST_FAIL); + + return TEST_PASS; +} diff --git a/test_conformance/api/test_spirv_queries.cpp b/test_conformance/api/test_spirv_queries.cpp new file mode 100644 index 00000000..720f73b1 --- /dev/null +++ b/test_conformance/api/test_spirv_queries.cpp @@ -0,0 +1,767 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "testBase.h" +#include +#include +#include + +#define SPV_ENABLE_UTILITY_CODE +#include + +static bool is_spirv_version_supported(cl_device_id deviceID, + const std::string& version) +{ + std::string ilVersions = get_device_il_version_string(deviceID); + return ilVersions.find(version) != std::string::npos; +} + +static int doQueries(cl_device_id device, + std::vector& extendedInstructionSets, + std::vector& extensions, + std::vector& capabilities) +{ + cl_int error = CL_SUCCESS; + + size_t size = 0; + error = + clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR, + 0, nullptr, &size); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n"); + + extendedInstructionSets.resize(size / sizeof(const char*)); + error = + clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR, + size, extendedInstructionSets.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr, + &size); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n"); + + extensions.resize(size / sizeof(const char*)); + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size, + extensions.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0, + nullptr, &size); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n"); + + capabilities.resize(size / sizeof(cl_uint)); + error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size, + capabilities.data(), nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n"); + + return CL_SUCCESS; +} + +static int findRequirements(cl_device_id device, + std::vector& extendedInstructionSets, + std::vector& extensions, + std::vector& capabilities) +{ + cl_int error = CL_SUCCESS; + + auto version = get_device_cl_version(device); + auto ilVersions = get_device_il_version_string(device); + + // If no SPIR-V versions are supported, there are no requirements. + if (ilVersions.find("SPIR-V") == std::string::npos) + { + return CL_SUCCESS; + } + + cl_bool deviceImageSupport = CL_FALSE; + cl_bool deviceReadWriteImageSupport = CL_FALSE; + cl_bool deviceSubGroupsSupport = CL_FALSE; + cl_bool deviceGenericAddressSpaceSupport = CL_FALSE; + cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE; + cl_bool devicePipeSupport = CL_FALSE; + cl_bool deviceDeviceEnqueueSupport = CL_FALSE; + cl_device_integer_dot_product_capabilities_khr + deviceIntegerDotProductCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0; + cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0; + + error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(deviceImageSupport), &deviceImageSupport, + nullptr); + test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n"); + + if (version >= Version(2, 0)) + { + cl_uint deviceMaxReadWriteImageArgs = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, + sizeof(deviceMaxReadWriteImageArgs), + &deviceMaxReadWriteImageArgs, nullptr); + test_error( + error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n"); + + deviceReadWriteImageSupport = + deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE; + } + + if (version >= Version(2, 1)) + { + cl_uint deviceMaxNumSubGroups = 0; + error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(deviceMaxNumSubGroups), + &deviceMaxNumSubGroups, nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n"); + + deviceSubGroupsSupport = + deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE; + } + else if (is_extension_available(device, "cl_khr_subgroups")) + { + deviceSubGroupsSupport = CL_TRUE; + } + + if (version >= Version(3, 0)) + { + error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, + sizeof(deviceGenericAddressSpaceSupport), + &deviceGenericAddressSpaceSupport, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n"); + + error = clGetDeviceInfo( + device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, + sizeof(deviceWorkGroupCollectiveFunctionsSupport), + &deviceWorkGroupCollectiveFunctionsSupport, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n"); + + error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT, + sizeof(devicePipeSupport), &devicePipeSupport, + nullptr); + test_error(error, + "clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n"); + + cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities = + 0; + error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, + sizeof(deviceDeviceEnqueueCapabilities), + &deviceDeviceEnqueueCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n"); + + deviceDeviceEnqueueSupport = + deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE; + } + else if (version >= Version(2, 0)) + { + deviceGenericAddressSpaceSupport = CL_TRUE; + deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE; + devicePipeSupport = CL_TRUE; + deviceDeviceEnqueueSupport = CL_TRUE; + } + + if (is_extension_available(device, "cl_khr_integer_dot_product")) + { + error = clGetDeviceInfo(device, + CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + sizeof(deviceIntegerDotProductCapabilities), + &deviceIntegerDotProductCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n"); + } + + if (is_extension_available(device, "cl_ext_float_atomics")) + { + error = + clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp32AtomicCapabilities), + &deviceFp32AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n"); + + error = + clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp16AtomicCapabilities), + &deviceFp16AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n"); + + error = + clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(deviceFp64AtomicCapabilities), + &deviceFp64AtomicCapabilities, nullptr); + test_error(error, + "clGetDeviceInfo failed for " + "CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n"); + } + + // Required. + extendedInstructionSets.push_back("OpenCL.std"); + + capabilities.push_back(spv::CapabilityAddresses); + capabilities.push_back(spv::CapabilityFloat16Buffer); + capabilities.push_back(spv::CapabilityInt16); + capabilities.push_back(spv::CapabilityInt8); + capabilities.push_back(spv::CapabilityKernel); + capabilities.push_back(spv::CapabilityLinkage); + capabilities.push_back(spv::CapabilityVector16); + + // Required for FULL_PROFILE devices, or devices supporting + // cles_khr_int64. + if (gHasLong) + { + capabilities.push_back(spv::CapabilityInt64); + } + + // Required for devices supporting images. + if (deviceImageSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityImage1D); + capabilities.push_back(spv::CapabilityImageBasic); + capabilities.push_back(spv::CapabilityImageBuffer); + capabilities.push_back(spv::CapabilityLiteralSampler); + capabilities.push_back(spv::CapabilitySampled1D); + capabilities.push_back(spv::CapabilitySampledBuffer); + } + + // Required for devices supporting SPIR-V 1.6. + if (ilVersions.find("SPIR-V_1.6") != std::string::npos) + { + capabilities.push_back(spv::CapabilityUniformDecoration); + } + + // Required for devices supporting read-write images. + if (deviceReadWriteImageSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityImageReadWrite); + } + + // Required for devices supporting the generic address space. + if (deviceGenericAddressSpaceSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityGenericPointer); + } + + // Required for devices supporting sub-groups or work-group collective + // functions. + if (deviceSubGroupsSupport == CL_TRUE + || deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityGroups); + } + + // Required for devices supporting pipes. + if (devicePipeSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityPipes); + } + + // Required for devices supporting device-side enqueue. + if (deviceDeviceEnqueueSupport == CL_TRUE) + { + capabilities.push_back(spv::CapabilityDeviceEnqueue); + } + + // Required for devices supporting SPIR-V 1.1 and OpenCL 2.2. + if (ilVersions.find("SPIR-V_1.1") != std::string::npos + && version == Version(2, 2)) + { + capabilities.push_back(spv::CapabilityPipeStorage); + } + + // Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or + // OpenCL 3.0 devices supporting sub-groups. + if (ilVersions.find("SPIR-V_1.1") != std::string::npos + && (version == Version(2, 2) + || (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE))) + { + capabilities.push_back(spv::CapabilitySubgroupDispatch); + } + + // Required for devices supporting cl_khr_expect_assume. + if (is_extension_available(device, "cl_khr_expect_assume")) + { + extensions.push_back("SPV_KHR_expect_assume"); + capabilities.push_back(spv::CapabilityExpectAssumeKHR); + } + + // Required for devices supporting cl_khr_extended_bit_ops. + if (is_extension_available(device, "cl_khr_extended_bit_ops")) + { + extensions.push_back("SPV_KHR_bit_instructions"); + capabilities.push_back(spv::CapabilityBitInstructions); + } + + // Required for devices supporting half-precision floating-point + // (cl_khr_fp16). + if (is_extension_available(device, "cl_khr_fp16")) + { + capabilities.push_back(spv::CapabilityFloat16); + } + + // Required for devices supporting double-precision floating-point + // (cl_khr_fp64). + if (is_extension_available(device, "cl_khr_fp64")) + { + capabilities.push_back(spv::CapabilityFloat64); + } + + // Required for devices supporting 64-bit atomics + // (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics). + if (is_extension_available(device, "cl_khr_int64_base_atomics") + || is_extension_available(device, "cl_khr_int64_extended_atomics")) + { + capabilities.push_back(spv::CapabilityInt64Atomics); + } + + // Required for devices supporting cl_khr_integer_dot_product. + if (is_extension_available(device, "cl_khr_integer_dot_product")) + { + extensions.push_back("SPV_KHR_integer_dot_product"); + capabilities.push_back(spv::CapabilityDotProduct); + capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked); + } + + // Required for devices supporting cl_khr_integer_dot_product and + // CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR. + if (is_extension_available(device, "cl_khr_integer_dot_product") + && (deviceIntegerDotProductCapabilities + & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)) + { + capabilities.push_back(spv::CapabilityDotProductInput4x8Bit); + } + + // Required for devices supporting cl_khr_kernel_clock. + if (is_extension_available(device, "cl_khr_kernel_clock")) + { + extensions.push_back("SPV_KHR_shader_clock"); + capabilities.push_back(spv::CapabilityShaderClockKHR); + } + + // Required for devices supporting both cl_khr_mipmap_image and + // cl_khr_mipmap_image_writes. + if (is_extension_available(device, "cl_khr_mipmap_image") + && is_extension_available(device, "cl_khr_mipmap_image_writes")) + { + capabilities.push_back(spv::CapabilityImageMipmap); + } + + // Required for devices supporting cl_khr_spirv_extended_debug_info. + if (is_extension_available(device, "cl_khr_spirv_extended_debug_info")) + { + extendedInstructionSets.push_back("OpenCL.DebugInfo.100"); + } + + // Required for devices supporting cl_khr_spirv_linkonce_odr. + if (is_extension_available(device, "cl_khr_spirv_linkonce_odr")) + { + extensions.push_back("SPV_KHR_linkonce_odr"); + } + + // Required for devices supporting + // cl_khr_spirv_no_integer_wrap_decoration. + if (is_extension_available(device, + "cl_khr_spirv_no_integer_wrap_decoration")) + { + extensions.push_back("SPV_KHR_no_integer_wrap_decoration"); + } + + // Required for devices supporting cl_khr_subgroup_ballot. + if (is_extension_available(device, "cl_khr_subgroup_ballot")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformBallot); + } + + // Required for devices supporting cl_khr_subgroup_clustered_reduce. + if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformClustered); + } + + // Required for devices supporting cl_khr_subgroup_named_barrier. + if (is_extension_available(device, "cl_khr_subgroup_named_barrier")) + { + capabilities.push_back(spv::CapabilityNamedBarrier); + } + + // Required for devices supporting + // cl_khr_subgroup_non_uniform_arithmetic. + if (is_extension_available(device, + "cl_khr_subgroup_non_uniform_arithmetic")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic); + } + + // Required for devices supporting cl_khr_subgroup_non_uniform_vote. + if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote")) + { + capabilities.push_back(spv::CapabilityGroupNonUniform); + capabilities.push_back(spv::CapabilityGroupNonUniformVote); + } + + // Required for devices supporting cl_khr_subgroup_rotate. + if (is_extension_available(device, "cl_khr_subgroup_rotate")) + { + extensions.push_back("SPV_KHR_subgroup_rotate"); + capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR); + } + + // Required for devices supporting cl_khr_subgroup_shuffle. + if (is_extension_available(device, "cl_khr_subgroup_shuffle")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformShuffle); + } + + // Required for devices supporting cl_khr_subgroup_shuffle_relative. + if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative")) + { + capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative); + } + + // Required for devices supporting cl_khr_work_group_uniform_arithmetic. + if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic")) + { + extensions.push_back("SPV_KHR_uniform_group_instructions"); + capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + extensions.push_back("SPV_EXT_shader_atomic_float16_add"); + capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp64 atomic + // adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp64 atomic + // min and max. + if (is_extension_available(device, "cl_ext_float_atomics") + && (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))) + { + capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT); + } + + // Required for devices supporting cl_ext_float_atomics and fp16, fp32, + // or fp64 atomic min or max. + if (is_extension_available(device, "cl_ext_float_atomics") + && ((deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)) + || (deviceFp16AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)) + || (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))) + { + extensions.push_back("SPV_EXT_shader_atomic_float_min_max"); + } + + // Required for devices supporting cl_ext_float_atomics and fp32 or fp64 + // atomic adds. + if (is_extension_available(device, "cl_ext_float_atomics") + && ((deviceFp32AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)) + || (deviceFp64AtomicCapabilities + & (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT + | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))) + { + extensions.push_back("SPV_EXT_shader_atomic_float_add"); + } + + // Required for devices supporting cl_intel_bfloat16_conversions. + if (is_extension_available(device, "cl_intel_bfloat16_conversions")) + { + extensions.push_back("SPV_INTEL_bfloat16_conversion"); + capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL); + } + + // Required for devices supporting + // cl_intel_spirv_device_side_avc_motion_estimation. + if (is_extension_available( + device, "cl_intel_spirv_device_side_avc_motion_estimation")) + { + extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation"); + capabilities.push_back( + spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL); + capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL); + capabilities.push_back( + spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL); + } + + // Required for devices supporting cl_intel_spirv_media_block_io. + if (is_extension_available(device, "cl_intel_spirv_media_block_io")) + { + extensions.push_back("SPV_INTEL_media_block_io"); + capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL); + } + + // Required for devices supporting cl_intel_spirv_subgroups. + if (is_extension_available(device, "cl_intel_spirv_subgroups")) + { + extensions.push_back("SPV_INTEL_subgroups"); + capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL); + capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL); + capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL); + } + + // Required for devices supporting cl_intel_split_work_group_barrier. + if (is_extension_available(device, "cl_intel_split_work_group_barrier")) + { + extensions.push_back("SPV_INTEL_split_barrier"); + capabilities.push_back(spv::CapabilitySplitBarrierINTEL); + } + + // Required for devices supporting cl_intel_subgroup_buffer_prefetch. + if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch")) + { + extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch"); + capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL); + } + + return CL_SUCCESS; +} + +REGISTER_TEST(spirv_query_requirements) +{ + if (!is_extension_available(device, "cl_khr_spirv_queries")) + { + log_info("cl_khr_spirv_queries is not supported; skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + std::vector queriedExtendedInstructionSets; + std::vector queriedExtensions; + std::vector queriedCapabilities; + + error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions, + queriedCapabilities); + test_error_fail(error, "Unable to perform SPIR-V queries"); + + std::vector requiredExtendedInstructionSets; + std::vector requiredExtensions; + std::vector requiredCapabilities; + error = findRequirements(device, requiredExtendedInstructionSets, + requiredExtensions, requiredCapabilities); + test_error_fail(error, "Unable to find SPIR-V requirements"); + + for (auto check : requiredExtendedInstructionSets) + { + auto cmp = [=](const char* queried) { + return strcmp(check, queried) == 0; + }; + auto it = std::find_if(queriedExtendedInstructionSets.begin(), + queriedExtendedInstructionSets.end(), cmp); + if (it == queriedExtendedInstructionSets.end()) + { + test_fail("Missing required extended instruction set: %s\n", check); + } + } + + for (auto check : requiredExtensions) + { + auto cmp = [=](const char* queried) { + return strcmp(check, queried) == 0; + }; + auto it = std::find_if(queriedExtensions.begin(), + queriedExtensions.end(), cmp); + if (it == queriedExtensions.end()) + { + test_fail("Missing required extension: %s\n", check); + } + } + + for (auto check : requiredCapabilities) + { + if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(), + check) + == queriedCapabilities.end()) + { + test_fail( + "Missing required capability: %s\n", + spv::CapabilityToString(static_cast(check))); + } + } + + // Find any extraneous capabilities (informational): + for (auto check : queriedCapabilities) + { + if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(), + check) + == requiredCapabilities.end()) + { + log_info( + "Found non-required capability: %s\n", + spv::CapabilityToString(static_cast(check))); + } + } + + return TEST_PASS; +} + +REGISTER_TEST(spirv_query_dependencies) +{ + if (!is_extension_available(device, "cl_khr_spirv_queries")) + { + log_info("cl_khr_spirv_queries is not supported; skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + cl_int error; + + std::vector queriedExtendedInstructionSets; + std::vector queriedExtensions; + std::vector queriedCapabilities; + + error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions, + queriedCapabilities); + test_error_fail(error, "Unable to perform SPIR-V queries"); + + struct CapabilityDependencies + { + std::vector extensions; + std::string version; + }; + + std::map dependencies; + +#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \ + dependencies[spv::Capability##_cap].version = _ver; +#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \ + dependencies[spv::Capability##_cap].extensions.push_back(_ext); +#include "spirv_capability_deps.def" + + // For each queried SPIR-V capability, ensure that either that any SPIR-V + // version dependencies or SPIR-V extension dependencies are satisfied. + + for (auto check : queriedCapabilities) + { + // Log and skip any unknown capabilities + auto it = dependencies.find(static_cast(check)); + if (it == dependencies.end()) + { + log_info( + "No known dependencies for queried capability %s!\n", + spv::CapabilityToString(static_cast(check))); + continue; + } + + // Check if a SPIR-V version dependency is satisfied + const auto& version_dep = it->second.version; + if (!version_dep.empty() + && is_spirv_version_supported(device, version_dep)) + { + continue; + } + + // Check if a SPIR-V extension dependency is satisfied + bool found = false; + for (const auto& extension_dep : it->second.extensions) + { + if (std::find(queriedExtensions.begin(), queriedExtensions.end(), + extension_dep) + != queriedExtensions.end()) + { + found = true; + break; + } + } + if (found) + { + continue; + } + + // If we get here then the capability has an unsatisfied dependency. + log_error("Couldn't find a dependency for queried capability %s!\n", + spv::CapabilityToString(static_cast(check))); + if (!version_dep.empty()) + { + log_error("Checked for SPIR-V version %s.\n", version_dep.c_str()); + } + for (const auto& extension_dep : it->second.extensions) + { + log_error("Checked for SPIR-V extension %s.n", + extension_dep.c_str()); + } + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp index 70a78f4a..c1b88be5 100644 --- a/test_conformance/api/test_sub_group_dispatch.cpp +++ b/test_conformance/api/test_sub_group_dispatch.cpp @@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1)) nullptr); test_error(error, "clGetDeviceInfo failed"); - max_local = max_work_item_sizes[0]; + error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(max_local), &max_local, nullptr); + test_error(error, "clGetKernelWorkGroupInfo failed"); + + max_local = std::min(max_local, max_work_item_sizes[0]); error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 7292bc9d..bf1f3bd6 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME BASIC) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_fpmath.cpp diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp index a981cd02..332b10e2 100644 --- a/test_conformance/basic/test_arraycopy.cpp +++ b/test_conformance/basic/test_arraycopy.cpp @@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy) err |= clSetKernelArg(kernel, 1, sizeof results, &results); test_error(err, "clSetKernelArg failed"); - size_t threads[3] = { num_elements, 0, 0 }; + size_t threads[3] = { static_cast(num_elements), 0, 0 }; err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); test_error(err, "clEnqueueNDRangeKernel failed"); diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp index 8a8f9381..bb44abff 100644 --- a/test_conformance/basic/test_arrayimagecopy.cpp +++ b/test_conformance/basic/test_arrayimagecopy.cpp @@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format( std::unique_ptr bufptr{ nullptr, free }, imgptr{ nullptr, free }; clMemWrapper buffer, image; - int img_width = 512; - int img_height = 512; - int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; + size_t img_width = 512; + size_t img_height = 512; + size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; size_t elem_size; size_t buffer_size; cl_int err; diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp index a5ae452f..418e184a 100644 --- a/test_conformance/basic/test_explicit_s2v.cpp +++ b/test_conformance/basic/test_explicit_s2v.cpp @@ -14,7 +14,6 @@ // limitations under the License. // #include -using std::isnan; #include "harness/compat.h" #include @@ -26,6 +25,7 @@ using std::isnan; #include #include "harness/conversions.h" +#include "harness/mathHelpers.h" #include "harness/typeWrappers.h" extern cl_half_rounding_mode halfRoundingMode; @@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = { // clang-format on -bool IsHalfNaN(cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - static int test_explicit_s2v_function(cl_context context, cl_command_queue queue, cl_kernel kernel, ExplicitType srcType, unsigned int count, @@ -183,20 +173,21 @@ static int test_explicit_s2v_function(cl_context context, { bool isSrcNaN = (((srcType == kHalf) - && IsHalfNaN(*reinterpret_cast(inPtr))) + && isnan_fp(*reinterpret_cast(inPtr))) || ((srcType == kFloat) - && isnan(*reinterpret_cast(inPtr))) + && isnan_fp(*reinterpret_cast(inPtr))) || ((srcType == kDouble) - && isnan(*reinterpret_cast(inPtr)))); - bool isDestNaN = (((destType == kHalf) - && IsHalfNaN(*reinterpret_cast( - outPtr + destTypeSize * s))) - || ((destType == kFloat) - && isnan(*reinterpret_cast( - outPtr + destTypeSize * s))) - || ((destType == kDouble) - && isnan(*reinterpret_cast( - outPtr + destTypeSize * s)))); + && isnan_fp(*reinterpret_cast(inPtr)))); + bool isDestNaN = + (((destType == kHalf) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s))) + || ((destType == kFloat) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s))) + || ((destType == kDouble) + && isnan_fp(*reinterpret_cast( + outPtr + destTypeSize * s)))); if (isSrcNaN && isDestNaN) { diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp index c39a2fec..f8f39ae0 100644 --- a/test_conformance/basic/test_fpmath.cpp +++ b/test_conformance/basic/test_fpmath.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/mathHelpers.h" #include "harness/rounding_mode.h" #include "harness/stringHelpers.h" @@ -57,16 +58,6 @@ template double toDouble(T val) return val; } -bool isHalfNan(cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = v & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - cl_half half_plus(cl_half a, cl_half b) { return HFF(std::plus()(HTF(a), HTF(b))); @@ -101,14 +92,7 @@ int verify_fp(std::vector (&input)[2], std::vector &output, T r = test.ref(inA[i], inB[i]); bool both_nan = false; - if (std::is_same::value) - { - both_nan = isHalfNan(r) && isHalfNan(output[i]); - } - else if (std::is_floating_point::value) - { - both_nan = std::isnan(r) && std::isnan(output[i]); - } + both_nan = isnan_fp(r) && isnan_fp(output[i]); // If not both nan, check if the result is the same if (!both_nan && (r != output[i])) diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp index e58b636e..9f3f700e 100644 --- a/test_conformance/basic/test_hostptr.cpp +++ b/test_conformance/basic/test_hostptr.cpp @@ -100,8 +100,8 @@ REGISTER_TEST(hostptr) cl_image_format img_format; cl_uchar *rgba8_inptr, *rgba8_outptr; void *lock_buffer; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_int err; MTdata d; RoundingMode oldRoundMode; diff --git a/test_conformance/basic/test_if.cpp b/test_conformance/basic/test_if.cpp index a0640879..b6b8c449 100644 --- a/test_conformance/basic/test_if.cpp +++ b/test_conformance/basic/test_if.cpp @@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst) int verify_if(std::vector input, std::vector output) { const cl_int results[] = { - 0x12345678, 0x23456781, 0x34567812, 0x45678123, - 0x56781234, 0x67812345, 0x78123456, 0x81234567, + (cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812, + (cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345, + (cl_int)0x78123456, (cl_int)0x81234567, }; auto predicate = [&results](cl_int a, cl_int b) { diff --git a/test_conformance/basic/test_image_multipass.cpp b/test_conformance/basic/test_image_multipass.cpp index 5d8ae993..e7af8726 100644 --- a/test_conformance/basic/test_image_multipass.cpp +++ b/test_conformance/basic/test_image_multipass.cpp @@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int REGISTER_TEST(image_multipass_integer_coord) { - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_image_format img_format; int num_input_streams = 8; @@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord) REGISTER_TEST(image_multipass_float_coord) { - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; cl_image_format img_format; int num_input_streams = 8; diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp index 7f177ff4..a400c460 100644 --- a/test_conformance/basic/test_imagearraycopy.cpp +++ b/test_conformance/basic/test_imagearraycopy.cpp @@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format( std::unique_ptr bufptr{ nullptr, free }, imgptr{ nullptr, free }; clMemWrapper buffer, image; - const int img_width = 512; - const int img_height = 512; - const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1; size_t elem_size; size_t buffer_size; cl_int err; diff --git a/test_conformance/basic/test_imagecopy.cpp b/test_conformance/basic/test_imagecopy.cpp index 22bdea11..5e888594 100644 --- a/test_conformance/basic/test_imagecopy.cpp +++ b/test_conformance/basic/test_imagecopy.cpp @@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; MTdataHolder d(gRandomSeed); @@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, for (i = 0; i < 3; i++) { void *p, *outp; - int x, y, delta_w = img_width / 8, delta_h = img_height / 16; + size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16; switch (i) { @@ -197,10 +197,11 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context, copy_origin, copy_region, 0, NULL, NULL); if (err) { - log_error("Copy %d (origin [%d, %d], size [%d, %d], image " - "size [%d x %d]) Failed\n", - copy_number, x, y, delta_w, delta_h, img_width, - img_height); + log_error( + "Copy %d (origin [%zu, %zu], size [%zu, %zu], image " + "size [%zu x %zu]) Failed\n", + copy_number, x, y, delta_w, delta_h, img_width, + img_height); } test_error(err, "clEnqueueCopyImage failed"); } diff --git a/test_conformance/basic/test_imagecopy3d.cpp b/test_conformance/basic/test_imagecopy3d.cpp index 5de2e3ae..53a88bd5 100644 --- a/test_conformance/basic/test_imagecopy3d.cpp +++ b/test_conformance/basic/test_imagecopy3d.cpp @@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context, std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 128; - int img_height = 128; - int img_depth = 64; + size_t img_width = 128; + size_t img_height = 128; + size_t img_depth = 64; int i; cl_int err; unsigned num_elements = img_width * img_height * img_depth * 4; diff --git a/test_conformance/basic/test_imagenpot.cpp b/test_conformance/basic/test_imagenpot.cpp index 1e2c213e..566cb9c3 100644 --- a/test_conformance/basic/test_imagenpot.cpp +++ b/test_conformance/basic/test_imagenpot.cpp @@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot) cl_kernel kernel; size_t global_threads[3], local_threads[3]; size_t local_workgroup_size; - int img_width; - int img_height; + size_t img_width; + size_t img_height; int err; cl_uint m; size_t max_local_workgroup_size[3]; diff --git a/test_conformance/basic/test_imagerandomcopy.cpp b/test_conformance/basic/test_imagerandomcopy.cpp index 79e6b749..748da6fb 100644 --- a/test_conformance/basic/test_imagerandomcopy.cpp +++ b/test_conformance/basic/test_imagerandomcopy.cpp @@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy) unsigned short *rgba16_inptr, *rgba16_outptr; float *rgbafp_inptr, *rgbafp_outptr; clMemWrapper streams[6]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, j; cl_int err; MTdata d; PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - log_info("Testing with image %d x %d.\n", img_width, img_height); + log_info("Testing with image %zu x %zu.\n", img_width, img_height); d = init_genrand( gRandomSeed ); rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d); @@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy) } size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1}; - err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL); -// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL); + err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin, + region, 0, 0, p, 0, NULL, NULL); test_error(err, "clEnqueueWriteImage failed"); for (j=0; j rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[3]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int num_tries = 200; int i, j, err; MTdataHolder d(gRandomSeed); @@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite) for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++) { - int x = (int)get_random_float(0, img_width, d); - int y = (int)get_random_float(0, img_height, d); - int w = (int)get_random_float(1, (img_width - x), d); - int h = (int)get_random_float(1, (img_height - y), d); + size_t x = (size_t)get_random_float(0, img_width, d); + size_t y = (size_t)get_random_float(0, img_height, d); + size_t w = (size_t)get_random_float(1, (img_width - x), d); + size_t h = (size_t)get_random_float(1, (img_height - y), d); size_t input_pitch; int set_input_pitch = (int)(genrand_int32(d) & 0x01); int packed_update = (int)(genrand_int32(d) & 0x01); @@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite) img_width, img_height); if (err) { - log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i); + log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x, + y, w, h, (int)input_pitch, (int)i); log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name); } break; diff --git a/test_conformance/basic/test_imagereadwrite3d.cpp b/test_conformance/basic/test_imagereadwrite3d.cpp index f384c339..6fb220b8 100644 --- a/test_conformance/basic/test_imagereadwrite3d.cpp +++ b/test_conformance/basic/test_imagereadwrite3d.cpp @@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d) std::unique_ptr rgba16_inptr, rgba16_outptr; std::unique_ptr rgbafp_inptr, rgbafp_outptr; clMemWrapper streams[3]; - int img_width = 64; - int img_height = 64; - int img_depth = 32; - int img_slice = img_width * img_height; + size_t img_width = 64; + size_t img_height = 64; + size_t img_depth = 32; + size_t img_slice = img_width * img_height; int num_tries = 30; int i, j, err; MTdataHolder mtData(gRandomSeed); @@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d) for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++) { - int x = (int)get_random_float(0, (float)img_width - 1, mtData); - int y = (int)get_random_float(0, (float)img_height - 1, mtData); - int z = (int)get_random_float(0, (float)img_depth - 1, mtData); - int w = (int)get_random_float(1, (float)(img_width - x), mtData); - int h = (int)get_random_float(1, (float)(img_height - y), mtData); - int d = (int)get_random_float(1, (float)(img_depth - z), mtData); + size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData); + size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData); + size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData); + size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData); + size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData); + size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData); size_t input_pitch, input_slice_pitch; int set_input_pitch = (int)(genrand_int32(mtData) & 0x01); int packed_update = (int)(genrand_int32(mtData) & 0x01); @@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name); } break; @@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d) img_width, img_height, img_depth); if (err) { - log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i); + log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, " + "slice_pitch=%d, try=%d\n", + x, y, z, w, h, d, (int)input_pitch, + (int)input_slice_pitch, (int)i); log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name); } break; diff --git a/test_conformance/basic/test_kernel_call_kernel_function.cpp b/test_conformance/basic/test_kernel_call_kernel_function.cpp index 0669ee24..82cb1a65 100644 --- a/test_conformance/basic/test_kernel_call_kernel_function.cpp +++ b/test_conformance/basic/test_kernel_call_kernel_function.cpp @@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function) clKernelWrapper kernel1, kernel2, kernel_to_call; clMemWrapper streams[2]; - size_t threads[] = {num_elements,1,1}; + size_t threads[] = { static_cast(num_elements), 1, 1 }; cl_int *input, *output, *expected; cl_int times = 4; int pass = 0; diff --git a/test_conformance/basic/test_multireadimagemultifmt.cpp b/test_conformance/basic/test_multireadimagemultifmt.cpp index b92daf88..8a16ca85 100644 --- a/test_conformance/basic/test_multireadimagemultifmt.cpp +++ b/test_conformance/basic/test_multireadimagemultifmt.cpp @@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple) cl_program program; cl_kernel kernel; size_t threads[2]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; MTdata d; diff --git a/test_conformance/basic/test_multireadimageonefmt.cpp b/test_conformance/basic/test_multireadimageonefmt.cpp index 1d0b5b8d..8a37e29b 100644 --- a/test_conformance/basic/test_multireadimageonefmt.cpp +++ b/test_conformance/basic/test_multireadimageonefmt.cpp @@ -100,8 +100,8 @@ REGISTER_TEST(mri_one) cl_program program; cl_kernel kernel; size_t threads[2]; - int img_width = 512; - int img_height = 512; + size_t img_width = 512; + size_t img_height = 512; int i, err; size_t origin[3] = {0, 0, 0}; size_t region[3] = {img_width, img_height, 1}; diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 15b4df43..b555b33d 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context, } cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer( - queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, + queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0, 0, 0); memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); @@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context, clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer( - queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, + queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0, 0, 0); memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index 046640b3..6098374c 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange maxWorkItemSizes[2] }; // check if maximum work group size for current dimention is not // exceeded - cl_uint work_group_size = max_workgroup_size + 1; + size_t work_group_size = max_workgroup_size + 1; while (max_workgroup_size < work_group_size && work_group_size != 1) { work_group_size = 1; @@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange // compute max number of work groups based on buffer size and max // group size - cl_uint max_work_groups = testData.size() / work_group_size; + size_t max_work_groups = testData.size() / work_group_size; // take into account number of dimentions - cl_uint work_groups_per_dim = + size_t work_groups_per_dim = std::max(1, (int)pow(max_work_groups, 1.f / dim)); for (size_t j = 0; j < dim; j++) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 3cab98ce..d321819f 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -24,8 +24,9 @@ #include "CL/cl_half.h" -#include +#include #include +#include #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() @@ -74,9 +75,11 @@ extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device + extern cl_half_rounding_mode gHalfRoundingMode; extern bool gFloatAtomicsSupported; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; +extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern const char * get_memory_order_type_name(TExplicitMemoryOrderType orderType); @@ -174,6 +177,13 @@ public: { return false; } + virtual bool + IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) + { + return expected != testValues[whichDestValue]; + } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { @@ -883,14 +893,15 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) header += std::string("__global volatile ") + aTypeName + " destMemory[" + ss.str() + "] = {\n"; ss.str(""); - if (CBasicTest::DataType()._type - != TYPE_ATOMIC_HALF) - ss << _startValue; - else + == TYPE_ATOMIC_FLOAT) + ss << std::setprecision(10) << _startValue; + else if (CBasicTest::DataType()._type + == TYPE_ATOMIC_HALF) ss << static_cast( cl_half_to_float(static_cast(_startValue))); - + else + ss << _startValue; for (cl_uint i = 0; i < maxNumDestItems; i++) { if (aTypeName == "atomic_flag") @@ -1449,7 +1460,7 @@ int CBasicTest::ExecuteSingleTest( startRefValues.size() ? &startRefValues[0] : 0, i)) break; // no expected value function provided - if (expected != destItems[i]) + if (IsTestNotAsExpected(expected, destItems, i)) { std::stringstream logLine; logLine << "ERROR: Result " << i diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index e5b1d328..d9482fb7 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -17,6 +17,9 @@ #define HOST_ATOMICS_H_ #include "harness/testHarness.h" +#include + +#include "CL/cl_half.h" #ifdef WIN32 #include "Windows.h" @@ -87,6 +90,8 @@ enum TExplicitMemoryOrderType #define HOST_FLAG cl_int +extern cl_half_rounding_mode gHalfRoundingMode; + // host atomic functions void host_atomic_thread_fence(TExplicitMemoryOrderType order); @@ -94,28 +99,51 @@ template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a += c; + return old_value; + } + else + { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeAdd(a, c); + return InterlockedExchangeAdd(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_add(a, c); + return __sync_fetch_and_add(a, c); #else - log_info("Host function not implemented: atomic_fetch_add\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_add\n"); + return 0; #endif + } } template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeSubtract(a, c); + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)), + gHalfRoundingMode); + return old_value; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + return InterlockedExchangeSubtract(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_sub(a, c); + return __sync_fetch_and_sub(a, c); #else - log_info("Host function not implemented: atomic_fetch_sub\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_sub\n"); + return 0; #endif + } } template @@ -144,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp TExplicitMemoryOrderType order_success, TExplicitMemoryOrderType order_failure) { - CorrespondingType tmp; -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - tmp = InterlockedCompareExchange(a, desired, *expected); + CorrespondingType tmp; + if constexpr (std::is_same_v) + { + static std::mutex mtx; + std::lock_guard lock(mtx); + tmp = *reinterpret_cast(a); + if (tmp == *expected) + { + *reinterpret_cast(a) = desired; + return true; + } + *expected = tmp; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + + tmp = InterlockedCompareExchange(a, desired, *expected); #elif defined(__GNUC__) - tmp = __sync_val_compare_and_swap(a, *expected, desired); + tmp = __sync_val_compare_and_swap(a, *expected, desired); #else - log_info("Host function not implemented: atomic_compare_exchange\n"); - tmp = 0; + log_info("Host function not implemented: atomic_compare_exchange\n"); + tmp = 0; #endif - if(tmp == *expected) - return true; - *expected = tmp; - return false; + if (tmp == *expected) return true; + *expected = tmp; + } + return false; } template diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 40972b26..485445f7 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap, cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; +cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -132,6 +133,12 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; + + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index a08a0daf..b51f4461 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -16,10 +16,13 @@ #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" +#include "harness/conversions.h" #include "common.h" #include "host_atomics.h" +#include +#include #include #include @@ -1163,61 +1166,233 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak) template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error_fp32; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error_fp32(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0.f); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = get_random_float(min_range, max_range, d); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end()); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort( + ref_vals.begin(), ref_vals.end(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + + double precise = 0.0; + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort(sums.begin(), sums.end()); + max_error_fp32 = + std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_add" + postfix - + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() - + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], ((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], ((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_add( - &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], - ((HostDataType)tid + 3) - << (sizeof(HostDataType) - 1) * 8, - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_add( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_add( + &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add( + &destMemory[0], + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected += ((HostDataType)i + 3) * 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + expected += startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected += ((HostDataType)i + 3) * 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } + return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return std::abs((HOST_ATOMIC_FLOAT)expected + - testValues[whichDestValue]) + > max_error_fp32; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_add_generic(cl_device_id deviceID, @@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchAdd test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd @@ -1304,46 +1488,239 @@ REGISTER_TEST(svm_atomic_fetch_add) template class CBasicTestFetchSub : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + template + float subtract_halfs(Iterator begin, Iterator end) + { + cl_half res = 0; + for (auto it = begin; it != end; ++it) + { + res = cl_half_from_float(cl_half_to_float(res) + - cl_half_to_float(*it), + gHalfRoundingMode); + } + return cl_half_to_float(res); + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return cl_half_to_float(a) < cl_half_to_float(b); + }); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return std::abs(cl_half_to_float(a)) + < std::abs(cl_half_to_float(b)); + }); + + float precise = 0.f; + for (auto elem : ref_vals) precise -= cl_half_to_float(elem); + sums.push_back(precise); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = std::abs(sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_sub" + postfix - + "(&destMemory[0], tid + 3 +(((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[0], tid + 3 +(((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8)" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_sub( - &destMemory[0], - (HostDataType)tid + 3 - + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_sub( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = + host_atomic_fetch_sub(&destMemory[0], + (HostDataType)tid + 3 + + (((HostDataType)tid + 3) + << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected -= (HostDataType)i + 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + { + for (cl_uint i = 0; i < threadCount; i++) + { + expected = cl_half_from_float( + cl_half_to_float(expected) + - cl_half_to_float(startRefValues[i]), + gHalfRoundingMode); + } + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected -= (HostDataType)i + 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + return std::abs(cl_half_to_float(expected) + - cl_half_to_float(testValues[whichDestValue])) + > max_error; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_sub_generic(cl_device_id deviceID, @@ -1368,6 +1745,15 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchSub test_half( + TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub @@ -2222,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2) template class CBasicTestFetchMin : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_min" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_min(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_min( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_min( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] < expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] < expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] < expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + for (cl_uint i = 1; i < threadCount; i++) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_min_generic(cl_device_id deviceID, @@ -2294,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMin test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMin @@ -2356,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min) template class CBasicTestFetchMax : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MinValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_max" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_max(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_max( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_max( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] > expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] > expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] > expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_max_generic(cl_device_id deviceID, @@ -2428,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMax test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMax diff --git a/test_conformance/common/vulkan_wrapper/CMakeLists.txt b/test_conformance/common/vulkan_wrapper/CMakeLists.txt index 42397998..f392f9c4 100644 --- a/test_conformance/common/vulkan_wrapper/CMakeLists.txt +++ b/test_conformance/common/vulkan_wrapper/CMakeLists.txt @@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES # needed by Vulkan wrapper to compile set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) add_cxx_flag_if_supported(-Wmisleading-indentation) -add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) add_cxx_flag_if_supported(-Wno-error) add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index f4245703..4d62a82f 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage( size_t clImageFormatSize; cl_image_desc image_desc; memset(&image_desc, 0x0, sizeof(cl_image_desc)); - cl_image_format img_format = { 0 }; + img_format = { 0 }; const VkImageCreateInfo VulkanImageCreateInfo = image2D.getVkImageCreateInfo(); @@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue) import.fd = fd; import.pNext = nullptr; import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - import.flags = 0; + import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT; VkResult res = vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import); diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp index f9a305e1..0bf89322 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp @@ -106,6 +106,7 @@ protected: cl_mem m_externalMemory; int fd; void *handle; + cl_image_format img_format; clExternalMemoryImage(); public: @@ -117,6 +118,7 @@ public: cl_device_id deviceId); virtual ~clExternalMemoryImage(); cl_mem getExternalMemoryImage(); + cl_image_format getImageFormat() { return img_format; }; }; class clExternalSemaphore { diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 75aa536d..3773944b 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList( VkPhysicalDeviceExternalBufferInfo buffer_info = {}; buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO; buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; VkExternalBufferProperties buffer_properties = {}; buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES; @@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice) VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr, handle_type.vk_type }; - VkExternalSemaphoreProperties query_result = {}; + VkExternalSemaphoreProperties query_result = { + VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES + }; vkGetPhysicalDeviceExternalSemaphorePropertiesKHR( vkDevice.getPhysicalDevice(), &handle_query, &query_result); if (query_result.externalSemaphoreFeatures diff --git a/test_conformance/compiler/CMakeLists.txt b/test_conformance/compiler/CMakeLists.txt index b64d3b31..498c6218 100644 --- a/test_conformance/compiler/CMakeLists.txt +++ b/test_conformance/compiler/CMakeLists.txt @@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES include(../CMakeCommon.txt) +# Include the relative paths to SPV assembly files +configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt) +include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt) + +# Determine the corresponding binary outputs to the SPV assembly input files +set(COMPILER_ASM_REL_PATH spirv_asm) +set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}") +set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin") + # Copy the required test include directories into the build directory. if(NOT DEFINED COMPILER_TEST_RESOURCES) set(COMPILER_TEST_RESOURCES $) endif() + +set(COMPILER_SPV_EXTRA "") +if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND + IS_DIRECTORY "${SPIRV_TOOLS_DIR}") + message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'") + set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val") +endif() +set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py) + +include(CMakePrintHelpers) +cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT) + add_custom_command( COMMENT "Copying compiler test resources..." TARGET ${${MODULE_NAME}_OUT} @@ -30,7 +51,10 @@ add_custom_command( ${COMPILER_TEST_RESOURCES}/includeTestDirectory COMMAND ${CMAKE_COMMAND} -E copy_directory ${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory - ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory) + ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory + COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose + DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM} + VERBATIM) include(GNUInstallDirs) diff --git a/test_conformance/compiler/write_kernel.spvasm32 b/test_conformance/compiler/spirv_asm/write_kernel.spvasm32 similarity index 100% rename from test_conformance/compiler/write_kernel.spvasm32 rename to test_conformance/compiler/spirv_asm/write_kernel.spvasm32 diff --git a/test_conformance/compiler/write_kernel.spvasm64 b/test_conformance/compiler/spirv_asm/write_kernel.spvasm64 similarity index 100% rename from test_conformance/compiler/write_kernel.spvasm64 rename to test_conformance/compiler/spirv_asm/write_kernel.spvasm64 diff --git a/test_conformance/compiler/spirv_asm_list.txt b/test_conformance/compiler/spirv_asm_list.txt new file mode 100644 index 00000000..8150ce28 --- /dev/null +++ b/test_conformance/compiler/spirv_asm_list.txt @@ -0,0 +1,4 @@ +set(COMPILER_SPIRV_NEW_ASM +compiler_spirv_asm/write_kernel.spvasm32 +compiler_spirv_asm/write_kernel.spvasm64 +) diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index 907de9cb..70ca9449 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "testBase.h" + +#include + #if defined(_WIN32) #include #elif defined(__linux__) || defined(__APPLE__) @@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link) return 0; } -#if defined(__APPLE__) || defined(__linux) -#define _mkdir(x) mkdir(x, S_IRWXU) -#define _chdir chdir -#define _rmdir rmdir -#define _unlink unlink -#else -#include -#endif - REGISTER_TEST(execute_after_included_header_link) { int error; @@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link) } /* setup */ -#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) - /* Some tests systems doesn't allow one to write in the test directory */ - if (_chdir("/tmp") != 0) + std::error_code ec; + auto temp_dir_path = std::filesystem::temp_directory_path(ec); + if (ec) { - log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to get the temporary directory path\n"); return -1; } -#endif - if (_mkdir("foo") != 0) + temp_dir_path = temp_dir_path / "foo" / "bar"; + std::filesystem::create_directories(temp_dir_path, ec); + if (ec) { - log_error("ERROR: Unable to create directory foo! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n", + temp_dir_path.u8string().c_str(), ec.value(), + ec.message().c_str()); return -1; } - if (_mkdir("foo/bar") != 0) - { - log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_chdir("foo/bar") != 0) - { - log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - FILE *simple_header_file = fopen(simple_header_name, "w"); + + const auto simple_header_path = temp_dir_path / simple_header_name; + FILE *simple_header_file = + fopen(simple_header_path.u8string().c_str(), "w"); if (simple_header_file == NULL) { log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } if (fprintf(simple_header_file, "%s", simple_header) < 0) { log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } if (fclose(simple_header_file) != 0) { log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); + simple_header_path.u8string().c_str(), __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) - { - log_error("ERROR: Unable to change to original working directory! (in " - "%s:%d)\n", - __FILE__, __LINE__); - return -1; - } -#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) - error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL, + + const std::string include_path = + std::string("-I") + temp_dir_path.generic_u8string(); + error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL, NULL, NULL, NULL); -#else - error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL, - NULL, NULL); -#endif test_error(error, "Unable to compile a simple program with included header"); /* cleanup */ - if (_chdir("foo/bar") != 0) + std::filesystem::remove_all(temp_dir_path, ec); + if (ec) { - log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_unlink(simple_header_name) != 0) - { - log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n", - simple_header_name, __FILE__, __LINE__); - return -1; - } - if (_chdir("../..") != 0) - { - log_error("ERROR: Unable to change to original working directory! (in " - "%s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_rmdir("foo/bar") != 0) - { - log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", - __FILE__, __LINE__); - return -1; - } - if (_rmdir("foo") != 0) - { - log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n", - __FILE__, __LINE__); + log_error("ERROR: Unable to delete directory: %s, error: %d (%s)", + temp_dir_path.u8string().c_str(), ec.value(), + ec.message().c_str()); return -1; } diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index baede608..c3f3993d 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -95,7 +95,9 @@ const char *known_extensions[] = { "cl_khr_command_buffer", "cl_khr_command_buffer_mutable_dispatch", "cl_khr_command_buffer_multi_device", - "cl_khr_external_memory_android_hardware_buffer" + "cl_khr_external_memory_android_hardware_buffer", + "cl_khr_unified_svm", + "cl_khr_spirv_queries" }; // clang-format on diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp index 93776a8b..f7a53628 100644 --- a/test_conformance/compiler/test_feature_macro.cpp +++ b/test_conformance/compiler/test_feature_macro.cpp @@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit( compiler_status, supported); } +static int test_feature_macro_ext_image_unorm_int_2_101010( + cl_device_id deviceID, cl_context context, std::string test_macro_name, + cl_bool& supported) +{ + cl_int error = TEST_FAIL; + cl_bool api_status = CL_TRUE; + cl_bool compiler_status; + log_info("\n%s ...\n", test_macro_name.c_str()); + + if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010")) + { + supported = false; + return TEST_PASS; + } + + error = check_compiler_feature_info(deviceID, context, test_macro_name, + compiler_status); + if (error != CL_SUCCESS) + { + return error; + } + + return feature_macro_verify_results(test_macro_name, api_status, + compiler_status, supported); +} + static int test_feature_macro_int64(cl_device_id deviceID, cl_context context, std::string test_macro_name, cl_bool& supported) @@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0)) NEW_FEATURE_MACRO_TEST(int64); NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit); NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed); + NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010); error |= test_consistency_c_features_list(device, supported_features_vec); diff --git a/test_conformance/compiler/test_unload_platform_compiler.cpp b/test_conformance/compiler/test_unload_platform_compiler.cpp index 84442080..bb41f64d 100644 --- a/test_conformance/compiler/test_unload_platform_compiler.cpp +++ b/test_conformance/compiler/test_unload_platform_compiler.cpp @@ -25,6 +25,43 @@ #include #include #include +#include + +#if defined(_WIN32) +const std::string slash = "\\"; +#else +const std::string slash = "/"; +#endif +std::string compilerSpvBinaries = "test_conformance" + slash + "compiler" + + slash + "spirv_bin" + slash + "write_kernel.spv"; + +const std::string spvExt = ".spv"; + +std::vector readBinary(const char *file_name) +{ + using namespace std; + + ifstream file(file_name, ios::in | ios::binary | ios::ate); + + std::vector tmpBuffer(0); + + if (file.is_open()) + { + size_t size = file.tellg(); + tmpBuffer.resize(size); + file.seekg(0, ios::beg); + file.read(&tmpBuffer[0], size); + file.close(); + } + else + { + log_error("File %s not found\n", file_name); + } + + std::vector result(tmpBuffer.begin(), tmpBuffer.end()); + + return result; +} namespace { @@ -299,18 +336,12 @@ public: throw unload_test_failure("Failure getting device address bits"); } - switch (address_bits) - { - case 32: - m_spirv_binary = write_kernel_32_spv.data(); - m_spirv_size = write_kernel_32_spv.size(); - break; - case 64: - m_spirv_binary = write_kernel_64_spv.data(); - m_spirv_size = write_kernel_64_spv.size(); - break; - default: throw unload_test_failure("Invalid address bits"); - } + std::vector kernel_buffer; + + std::string file_name = + compilerSpvBinaries + std::to_string(address_bits); + m_spirv_binary = readBinary(file_name.c_str()); + m_spirv_size = m_spirv_binary.size(); } void create() final @@ -320,7 +351,7 @@ public: assert(nullptr == m_program); cl_int err = CL_INVALID_PLATFORM; - m_program = m_CreateProgramWithIL(m_context, m_spirv_binary, + m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0], m_spirv_size, &err); if (CL_SUCCESS != err) throw unload_test_failure("clCreateProgramWithIL()", err); @@ -347,7 +378,7 @@ public: } private: - void *m_spirv_binary; + std::vector m_spirv_binary; size_t m_spirv_size; bool m_enabled; diff --git a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp index 82f87ffc..a529c212 100644 --- a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp +++ b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp @@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"( kernel void write_kernel(global unsigned int *p) { *p = 42; })"; - -/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */ -static std::array write_kernel_64_spv{ - { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, - 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 } -}; - -/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */ -static std::array write_kernel_32_spv{ - { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, - 0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, - 0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00, - 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 } -}; diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index 2fdf0323..0c868764 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -284,6 +284,11 @@ int main( int argc, const char **argv ) static int ParseArgs( int argc, const char **argv ) { + if (gListTests) + { + return 0; + } + gArgList = (const char **)calloc( argc, sizeof( char*) ); if( NULL == gArgList ) diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt index 32990eba..e2e97667 100644 --- a/test_conformance/conversions/CMakeLists.txt +++ b/test_conformance/conversions/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME CONVERSIONS) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set (${MODULE_NAME}_SOURCES Sleep.cpp test_conversions.cpp basic_test_conversions.cpp ) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 59d41e55..79333275 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -13,9 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#include "harness/mathHelpers.h" #include "harness/testHarness.h" #include "harness/compat.h" #include "harness/ThreadPool.h" +#include "harness/parseParameters.h" #if defined(__APPLE__) #include @@ -52,17 +54,17 @@ #include "basic_test_conversions.h" -#if defined(_WIN32) +#if defined(_M_IX86) || defined(_M_X64) #include #include -#else // !_WIN32 +#else #if defined(__SSE__) #include #endif #if defined(__SSE2__) #include #endif -#endif // _WIN32 +#endif cl_context gContext = NULL; cl_command_queue gQueue = NULL; @@ -76,7 +78,6 @@ cl_mem gInBuffer; cl_mem gOutBuffers[kCallStyleCount]; size_t gComputeDevices = 0; uint32_t gDeviceFrequency = 0; -int gWimpyMode = 0; int gWimpyReductionFactor = 128; int gSkipTesting = 0; int gForceFTZ = 0; @@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr &info) // destroyed automatically soon after we exit. } -template static bool isnan_fp(const T &v) -{ - if (std::is_same::value) - { - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - return (h_exp == 0x1F && h_mant != 0); - } - else - { -#if !defined(_WIN32) - return std::isnan(v); -#else - return _isnan(v); -#endif - } -} - template void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input) { diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 56232374..6846f780 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -80,7 +80,6 @@ extern int gHasDouble; extern int gTestDouble; extern int gHasHalfs; extern int gTestHalfs; -extern int gWimpyMode; extern int gWimpyReductionFactor; extern int gSkipTesting; extern int gMinVectorSize; diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index 9d2cbc60..46eb9c23 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -343,7 +343,7 @@ float DataInfoSpec::round_to_int(float f) volatile float x = f; float magicVal = magic[f < 0]; -#if defined(__SSE__) +#if defined(__SSE__) || _M_IX86_FP == 1 // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128 v = _mm_set_ss(x); @@ -376,7 +376,7 @@ DataInfoSpec::round_to_int_and_clamp(double f) { volatile double x = f; double magicVal = magic[f < 0]; -#if defined(__SSE2__) || defined(_MSC_VER) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) // Defeat x87 based arithmetic, which cant do FTZ, and will round this // incorrectly __m128d v = _mm_set_sd(x); @@ -479,7 +479,7 @@ void DataInfoSpec::conv(OutType *out, InType *in) { if (std::is_same::value) { -#if defined(_MSC_VER) +#if defined(_M_IX86) || defined(_M_X64) double result; if (std::is_same::value) diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 1712e099..7a143a6b 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -112,6 +112,35 @@ int main(int argc, const char **argv) int error; argc = parseCustomParam(argc, argv); + if (gListTests) + { + for (unsigned dst = 0; dst < kTypeCount; dst++) + { + for (unsigned src = 0; src < kTypeCount; src++) + { + for (unsigned sat = 0; sat < 2; sat++) + { + // skip illegal saturated conversions to float type + if (gSaturationNames[sat] == std::string("_sat") + && (gTypeNames[dst] == std::string("float") + || gTypeNames[dst] == std::string("half") + || gTypeNames[dst] == std::string("double"))) + { + continue; + } + for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++) + { + vlog("\t%s\n", + (std::string(gTypeNames[dst]) + + gSaturationNames[sat] + gRoundingModeNames[rnd] + + "_" + gTypeNames[src]) + .c_str()); + } + } + } + } + return 0; + } if (argc == -1) { return 1; @@ -218,7 +247,6 @@ static int ParseArgs(int argc, const char **argv) case 'h': gTestHalfs ^= 1; break; case 'l': gSkipTesting ^= 1; break; case 'm': gMultithread ^= 1; break; - case 'w': gWimpyMode ^= 1; break; case '[': parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; @@ -287,14 +315,6 @@ static int ParseArgs(int argc, const char **argv) } } - // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) - { - vlog("\n"); - vlog("*** Detected CL_WIMPY_MODE env ***\n"); - gWimpyMode = 1; - } - vlog("\n"); PrintArch(); @@ -335,9 +355,6 @@ static void PrintUsage(void) vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we " "just check to see that the kernels build. (Off by default.)\n"); vlog("\t\t-m\tToggle Multithreading. (On by default.)\n"); - vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very " - "small subset of the tests for each fn. NOT A VALID TEST! (Off by " - "default.)\n"); vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-12, default factor(%u)\n", gWimpyReductionFactor); diff --git a/test_conformance/d3d10/CMakeLists.txt b/test_conformance/d3d10/CMakeLists.txt index 385ea86f..46387f51 100644 --- a/test_conformance/d3d10/CMakeLists.txt +++ b/test_conformance/d3d10/CMakeLists.txt @@ -1,22 +1,4 @@ if(WIN32) - -set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include) - -if(${ARCH} STREQUAL "i686") -set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86) -endif(${ARCH} STREQUAL "i686") - -if(${ARCH} STREQUAL "x86_64") -set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64) -endif(${ARCH} STREQUAL "x86_64") - -list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR}) -include_directories (${CLConform_SOURCE_DIR}/test_common/harness - ${CLConform_INCLUDE_DIR} ) -link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR}) - -list(APPEND CLConform_LIBRARIES d3d10 dxgi) - set(MODULE_NAME D3D10) set(${MODULE_NAME}_SOURCES @@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES harness.cpp ) -set_source_files_properties( - ${MODULE_NAME}_SOURCES - PROPERTIES LANGUAGE CXX) +list(APPEND CLConform_LIBRARIES d3d10 dxgi) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include(../CMakeCommon.txt) -endif(WIN32) +else() +message(STATUS "D3D10 tests are only supported on Windows.") +endif() diff --git a/test_conformance/d3d11/CMakeLists.txt b/test_conformance/d3d11/CMakeLists.txt index 14a378d7..b9b81b56 100644 --- a/test_conformance/d3d11/CMakeLists.txt +++ b/test_conformance/d3d11/CMakeLists.txt @@ -1,22 +1,4 @@ if(WIN32) - -set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include) - -if(${ARCH} STREQUAL "i686") -set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86) -endif(${ARCH} STREQUAL "i686") - -if(${ARCH} STREQUAL "x86_64") -set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64) -endif(${ARCH} STREQUAL "x86_64") - -list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR}) -include_directories (${CLConform_SOURCE_DIR}/test_common/harness - ${CLConform_INCLUDE_DIR} ) -link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR}) - -list(APPEND CLConform_LIBRARIES d3d11 dxgi) - set(MODULE_NAME D3D11) set(${MODULE_NAME}_SOURCES @@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES harness.cpp ) -set_source_files_properties( - ${MODULE_NAME}_SOURCES - PROPERTIES LANGUAGE CXX) +list(APPEND CLConform_LIBRARIES d3d11 dxgi) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include(../CMakeCommon.txt) -endif(WIN32) +else() +message(STATUS "D3D11 tests are only supported on Windows.") +endif() diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index c3761d08..10d7b86d 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; // clang-format off static const char* enqueue_simple_block[] = { R"( diff --git a/test_conformance/device_execution/enqueue_flags.cpp b/test_conformance/device_execution/enqueue_flags.cpp index d880fadd..35610174 100644 --- a/test_conformance/device_execution/enqueue_flags.cpp +++ b/test_conformance/device_execution/enqueue_flags.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; #define BITS_DEPTH 28 static const char* enqueue_flags_wait_kernel_simple[] = diff --git a/test_conformance/device_execution/enqueue_multi_queue.cpp b/test_conformance/device_execution/enqueue_multi_queue.cpp index 90dbf2fd..5caaefa7 100644 --- a/test_conformance/device_execution/enqueue_multi_queue.cpp +++ b/test_conformance/device_execution/enqueue_multi_queue.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -24,10 +25,7 @@ #include - - #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char enqueue_block_multi_queue[] = NL "#define BLOCK_COMPLETED 0" NL "#define BLOCK_SUBMITTED 1" diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp index ffadc0ac..8f71ac4e 100644 --- a/test_conformance/device_execution/enqueue_ndrange.cpp +++ b/test_conformance/device_execution/enqueue_ndrange.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include #include @@ -26,7 +27,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char *helper_ndrange_1d_glo[] = { NL, "void block_fn(int len, __global atomic_uint* val)" NL, diff --git a/test_conformance/device_execution/enqueue_wg_size.cpp b/test_conformance/device_execution/enqueue_wg_size.cpp index f662edb9..5f40951e 100644 --- a/test_conformance/device_execution/enqueue_wg_size.cpp +++ b/test_conformance/device_execution/enqueue_wg_size.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static int nestingLevel = 3; static const char* enqueue_1D_wg_size_single[] = diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp index cca83454..13ab87e6 100644 --- a/test_conformance/device_execution/host_multi_queue.cpp +++ b/test_conformance/device_execution/host_multi_queue.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -25,7 +26,6 @@ #ifdef CL_VERSION_2_0 -extern int gWimpyMode; static const char* multi_queue_simple_block1[] = { NL, "void block_fn(size_t tid, int mul, __global int* res)" diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp index bafbce08..d235780e 100644 --- a/test_conformance/device_execution/host_queue_order.cpp +++ b/test_conformance/device_execution/host_queue_order.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include #include @@ -24,8 +25,6 @@ #include "utils.h" #include -extern int gWimpyMode; - #ifdef CL_VERSION_2_0 static const char* enqueue_block_first_kernel[] = diff --git a/test_conformance/device_execution/main.cpp b/test_conformance/device_execution/main.cpp index efb311f9..81c19802 100644 --- a/test_conformance/device_execution/main.cpp +++ b/test_conformance/device_execution/main.cpp @@ -25,7 +25,6 @@ #include "utils.h" std::string gKernelName; -int gWimpyMode = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -71,11 +70,6 @@ int main(int argc, const char *argv[]) gKernelName = std::string(argv[i + 1]); argsRemoveNum += 2; } - if (strcmp(argv[i], "-w") == 0 ){ - gWimpyMode = 1; - argsRemoveNum += 1; - } - if (argsRemoveNum > 0) { for (int j = i; j < (argc - argsRemoveNum); ++j) diff --git a/test_conformance/device_execution/nested_blocks.cpp b/test_conformance/device_execution/nested_blocks.cpp index a191bdf5..9fc2d741 100644 --- a/test_conformance/device_execution/nested_blocks.cpp +++ b/test_conformance/device_execution/nested_blocks.cpp @@ -17,6 +17,7 @@ #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" +#include "harness/parseParameters.h" #include @@ -27,7 +28,6 @@ #ifdef CL_VERSION_2_0 static int gNestingLevel = 4; -extern int gWimpyMode; static const char* enqueue_nested_blocks_single[] = { diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt index aa57990b..2fee828a 100644 --- a/test_conformance/extensions/CMakeLists.txt +++ b/test_conformance/extensions/CMakeLists.txt @@ -5,6 +5,9 @@ add_subdirectory( cl_ext_cxx_for_opencl ) add_subdirectory( cl_khr_command_buffer ) add_subdirectory( cl_khr_dx9_media_sharing ) +if(ANDROID_PLATFORM GREATER 28) + add_subdirectory( cl_khr_external_memory_ahb ) +endif () add_subdirectory( cl_khr_external_memory_dma_buf ) add_subdirectory( cl_khr_semaphore ) add_subdirectory( cl_khr_kernel_clock ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43926b84..9c3a402b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip() { return BasicCommandBufferTest::Skip() || !simultaneous_use_support; } + +cl_int EnqueueAndReleaseTest::Run() +{ + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + cl_int pattern = 42; + error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr, + nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Calls release on cl_command_buffer_khr handle inside wrapper class, and + // sets the handle to nullptr, so that release doesn't get called again at + // end of test when wrapper object is destroyed. + command_buffer.reset(); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + } + + return CL_SUCCESS; +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index 7ad7d28d..241a08c5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest bool Skip() override; }; +// Test releasing a command-buffer after it has been submitted for execution, +// but before the user has waited on completion of the enqueue. +struct EnqueueAndReleaseTest : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override; +}; + template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp index 0f95372a..69d554c4 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer_tests.cpp @@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush) return MakeAndRunTest(device, context, queue, num_elements); } + +REGISTER_TEST(enqueue_and_release) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index 8fa84162..aed183ff 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES mutable_command_multiple_dispatches.cpp mutable_command_iterative_arg_update.cpp mutable_command_work_groups.cpp + mutable_command_work_dim.cpp + mutable_command_update_state.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp index 8fb6b643..3cfc4db1 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest for (size_t i = 0; i < num_elements; i++) if (i >= update_global_size && global_work_size != resultData[i]) { - log_error("Data failed to verify: update_global_size != " + log_error("Data failed to verify: global_work_size != " "resultData[%zu]=%d\n", i, resultData[i]); return TEST_FAIL; @@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest size_t info_global_size = 0; const size_t update_global_size = 3; - const size_t sizeToAllocate = global_work_size; + const size_t sizeToAllocate = global_work_size * sizeof(cl_int); const size_t num_elements = sizeToAllocate / sizeof(cl_int); cl_mutable_command_khr command = nullptr; }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index f75457e6..1657a6fa 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest : InfoMutableCommandBufferTest(device, context, queue) {} - virtual bool Skip() override - { - Version device_version = get_device_cl_version(device); - if ((device_version >= Version(3, 0)) - || is_extension_available(device, "cl_khr_extended_versioning")) - { - - cl_version extension_version = get_extension_version( - device, "cl_khr_command_buffer_mutable_dispatch"); - - if (extension_version != CL_MAKE_VERSION(0, 9, 3)) - { - log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 " - "is required to run the test, skipping.\n "); - return true; - } - } - return InfoMutableCommandBufferTest::Skip(); - } - cl_int Run() override { cl_command_properties_khr props[] = { diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp index b4945e77..4b1610f5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_simultaneous.cpp @@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest { cl_int offset; std::vector output_buffer; + std::vector updated_output_buffer; // 0:user event, 1:offset-buffer fill event, 2:kernel done event clEventWrapper wait_events[3]; }; @@ -337,6 +338,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest * buffer_size_multiplier, nullptr, &error); test_error(error, "clCreateBuffer failed"); + // Retain new output memory object until the end of the test. + retained_output_buffers.push_back(new_out_mem); cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), &new_out_mem }; @@ -373,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(), - pd.output_buffer.data(), 1, + pd.updated_output_buffer.data(), 1, &pd.wait_events[2], nullptr); test_error(error, "clEnqueueReadBuffer failed"); @@ -388,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest cl_int offset = static_cast(num_elements); std::vector simul_passes = { - { 0, std::vector(num_elements) }, - { offset, std::vector(num_elements) } + { 0, std::vector(num_elements), + std::vector(num_elements) }, + { offset, std::vector(num_elements), + std::vector(num_elements) } }; for (auto&& pass : simul_passes) @@ -407,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest test_error(error, "clFinish failed"); // verify the result buffers - for (auto&& pass : simul_passes) + auto& first_pass_output = simul_passes[0].output_buffer; + auto& first_pass_updated_output = simul_passes[0].updated_output_buffer; + auto& second_pass_output = simul_passes[1].output_buffer; + auto& second_pass_updated_output = + simul_passes[1].updated_output_buffer; + for (size_t i = 0; i < num_elements; i++) { - auto& res_data = pass.output_buffer; - for (size_t i = 0; i < num_elements; i++) - { - CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); - } + // First pass: + // Before updating, out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i); + // After updating, new_out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i], + i); + // Second pass: + // Before updating, out_mem is filled with overwritten_pattern + CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i], + i); + // After updating, new_out_mem is copied from in_mem (pattern_pri) + CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i], + i); } return CL_SUCCESS; @@ -429,6 +447,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest clKernelWrapper kernel_fill; clProgramWrapper program_fill; + std::vector retained_output_buffers; + const size_t test_global_work_size = 3 * sizeof(cl_int); const cl_int pattern_pri = 42; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp new file mode 100644 index 00000000..b3c421b2 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_update_state.cpp @@ -0,0 +1,280 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "mutable_command_basic.h" + +#include + +#include + +namespace { + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to ensuring the state of the updated command-buffer is expected +// and the effects of operations on it don't have side effects on other objects. +// +// - Tests the updates applied to a command-buffer persist over all subsequent +// enqueues. +// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension. + +struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest +{ + MutableDispatchUpdateStateTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + buffer(nullptr), command(nullptr) + {} + + bool Skip() override + { + if (BasicMutableCommandBufferTest::Skip()) return true; + + cl_mutable_dispatch_fields_khr mutable_capabilities; + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + return !mutable_support; + } + + cl_int SetUpKernelArgs() override + { + cl_int error = CL_SUCCESS; + buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), nullptr, &error); + test_error(error, "clCreateBuffer error"); + + // Zero initialize buffer + const cl_int zero_pattern = 0; + error = clEnqueueFillBuffer( + queue, buffer, &zero_pattern, sizeof(cl_int), 0, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer); + test_error(error, "Unable to set kernel argument 0"); + + return CL_SUCCESS; + } + + cl_int SetUpKernel() override + { + const char *add_kernel = + R"( + __kernel void add_kernel(__global int *data, int value) + { + size_t tid = get_global_id(0); + data[tid] += value; + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &add_kernel, "add_kernel"); + test_error(error, "Creating kernel failed"); + return CL_SUCCESS; + } + + bool verify_result(cl_int ref) + { + std::vector data(num_elements); + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(), + data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (data[i] != ref) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted %d\n", + i, data[i], ref); + return false; + } + } + return true; + } + + clMemWrapper buffer; + cl_mutable_command_khr command; +}; + +struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest +{ + MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : MutableDispatchUpdateStateTest(device, context, queue) + {} + + cl_int Run() override + { + const cl_int original_val = 42; + cl_int error = + clSetKernelArg(kernel, 1, sizeof(original_val), &original_val); + test_error(error, "Unable to set kernel argument 1"); + + cl_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Modify the command buffer before executing + const cl_int new_command_val = 5; + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val), + &new_command_val }; + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + const unsigned iterations = 5; + for (unsigned i = 0; i < iterations; i++) + { + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + } + + // Check the results execution sequence is the clEnqueueNDRangeKernel + // value + the updated command-buffer value, not using the original + // command value in the operation. + constexpr cl_int ref = iterations * new_command_val; + return verify_result(ref) ? TEST_PASS : TEST_FAIL; + } +}; + +struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest +{ + MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : MutableDispatchUpdateStateTest(device, context, queue) + {} + + cl_int Run() override + { + const cl_int original_val = 42; + cl_int error = + clSetKernelArg(kernel, 1, sizeof(original_val), &original_val); + test_error(error, "Unable to set kernel argument 1"); + + cl_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Set new kernel argument for later clEnqueueNDRangeKernel + const cl_int new_eager_val = 10; + error = + clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val); + test_error(error, "Unable to set kernel argument 1"); + + // Modify the command buffer before executing + const cl_int new_command_val = 5; + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val), + &new_command_val }; + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // Eager kernel enqueue, followed by command-buffer enqueue + error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results execution sequence is the clEnqueueNDRangeKernel + // value + the updated command-buffer value, not using the original + // command value in the operation. + constexpr cl_int ref = new_eager_val + new_command_val; + return verify_result(ref) ? TEST_PASS : TEST_FAIL; + } +}; +} + +REGISTER_TEST(mutable_dispatch_updates_persist) +{ + return MakeAndRunTest( + device, context, queue, num_elements); +} + +REGISTER_TEST(mutable_dispatch_set_kernel_arg) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp new file mode 100644 index 00000000..b317884b --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_work_dim.cpp @@ -0,0 +1,225 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "mutable_command_basic.h" + +#include +#include + +#include + +// mutable dispatch tests setting `work_dim` to the original 3D value +// behaves as expected. + +struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchWorkDim(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + cl_int SetUp(int elements) override + { + result_data.resize(update_total_elements); + return InfoMutableCommandBufferTest::SetUp(elements); + } + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR); + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements) + { + std::memset(result_data.data(), 0, alloc_size); + cl_int error = + clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size, + result_data.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < total_elements; i++) + { + if (result_data[i] != expected_value) + { + log_error("Data failed to verify at index %zu. " + "Expected %u, result was %u\n", + i, expected_value, result_data[i]); + return false; + } + } + return true; + } + + cl_int Run() override + { + const char *global_size_kernel = + R"( + __kernel void three_dim(__global uint *dst0, + __global uint *dst1, + __global uint *dst2) + { + size_t gid = get_global_linear_id(); + dst0[gid] = get_global_size(0); + dst1[gid] = get_global_size(1); + dst2[gid] = get_global_size(2); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &global_size_kernel, "three_dim"); + test_error(error, "Creating kernel failed"); + + // Create a buffer for each of the three dimensions to write the + // global size into. + clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + alloc_size, nullptr, &error); + test_error(error, "Creating test array failed"); + + // Set the arguments + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3); + test_error(error, "Unable to set indexed kernel arguments"); + + // Command-buffer contains a single kernel + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, work_dim, nullptr, + global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Enqueue command-buffer and wait on completion + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + // Verify results before any update + if (!Verify(stream1, global_size_3D[0], original_total_elements)) + { + return TEST_FAIL; + } + if (!Verify(stream2, global_size_3D[1], original_total_elements)) + { + return TEST_FAIL; + } + if (!Verify(stream3, global_size_3D[2], original_total_elements)) + { + return TEST_FAIL; + } + + // Update command with a mutable config where we use a different 3D + // global size, but hardcode `work_dim` to 3 (the original value). + cl_mutable_dispatch_config_khr dispatch_config{ + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + work_dim /* work_dim */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + update_global_size_3D.data() /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void *configs[1] = { &dispatch_config }; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // Enqueue updated command-buffer + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Verify update is reflected in buffer output. + if (!Verify(stream1, update_global_size_3D[0], update_total_elements)) + { + return TEST_FAIL; + } + if (!Verify(stream2, update_global_size_3D[1], update_total_elements)) + { + return TEST_FAIL; + } + if (!Verify(stream3, update_global_size_3D[2], update_total_elements)) + { + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + static const cl_uint work_dim = 3; + // 3D global size of kernel command when created + static const size_t original_elements = 2; + static constexpr std::array global_size_3D = { + original_elements, original_elements, original_elements + }; + // 3D global size to update kernel command to. + static const size_t update_elements = 4; + static constexpr std::array update_global_size_3D = { + update_elements, update_elements, update_elements + }; + // Total number of work items in original and updated grids + static const size_t original_total_elements = + original_elements * original_elements * original_elements; + static const size_t update_total_elements = + update_elements * update_elements * update_elements; + // Size in bytes of each of the 3 cl_mem buffers (using the larger size) + static const size_t alloc_size = update_total_elements * sizeof(cl_uint); + + cl_mutable_command_khr command = nullptr; + std::vector result_data; +}; + +// get_global_linear() used in kernel is an OpenCL 2.0 API +REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0)) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 88ea906b..add0a531 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest &trigger_event, &execute_event); test_error(error, "clEnqueueCommandBufferKHR failed"); - // verify pending state - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - // execute command buffer cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE); diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp index bb59118d..ae14b87b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp @@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState error = EnqueueCommandBuffer(); test_error(error, "EnqueueCommandBuffer failed"); - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - test_error(error, "State is not Pending"); return CL_SUCCESS; } diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp index 8d00ca86..05f43506 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_finalize.cpp @@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest error = EnqueueCommandBuffer(); test_error(error, "EnqueueCommandBuffer failed"); - error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR); - test_error(error, "State is not Pending"); error = clFinalizeCommandBufferKHR(command_buffer); test_failure_error_ret(error, CL_INVALID_OPERATION, diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt new file mode 100644 index 00000000..54ace3cf --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/CMakeLists.txt @@ -0,0 +1,12 @@ +set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB) + +set(${MODULE_NAME}_SOURCES + main.cpp + test_ahb.cpp + test_ahb_negative.cpp + debug_ahb.cpp +) + +link_libraries(OpenCL nativewindow) + +include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp new file mode 100644 index 00000000..e0ca6615 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.cpp @@ -0,0 +1,193 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "debug_ahb.h" + +constexpr AHardwareBuffer_UsageFlags flag_list[] = { + AHARDWAREBUFFER_USAGE_CPU_READ_RARELY, + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, + AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER, + AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, + AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK, + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE, + AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER, + AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY, + AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT, + AHARDWAREBUFFER_USAGE_VIDEO_ENCODE, + AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA, + AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER, + AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP, + AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE, + AHARDWAREBUFFER_USAGE_FRONT_BUFFER, +}; + +std::string +ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags) +{ + if (flags == 0) + { + return "UNKNOWN FLAG"; + } + + std::vector active_flags; + for (const auto flag : flag_list) + { + if (flag & flags) + { + active_flags.push_back(ahardwareBufferUsageFlagToString(flag)); + } + } + + if (active_flags.empty()) + { + return "UNKNOWN FLAG"; + } + + return std::accumulate(active_flags.begin() + 1, active_flags.end(), + active_flags.front(), + [](std::string acc, const std::string& flag) { + return std::move(acc) + "|" + flag; + }); +} + +std::string +ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag) +{ + std::string result; + switch (flag) + { + case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN"; + break; + case AHARDWAREBUFFER_USAGE_CPU_READ_MASK: + result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN"; + break; + case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK: + result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK"; + break; + case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE: + result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE"; + break; + case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER: + result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER"; + break; + case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY: + result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY"; + break; + case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT: + result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT"; + break; + case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE: + result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE"; + break; + case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA: + result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA"; + break; + case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER: + result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER"; + break; + case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP: + result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP"; + break; + case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE: + result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE"; + break; + default: result = "Unknown flag"; + } + return result; +} + +std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format) +{ + std::string result; + switch (format) + { + case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: + result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT"; + break; + case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_BLOB: + result = "AHARDWAREBUFFER_FORMAT_BLOB"; + break; + case AHARDWAREBUFFER_FORMAT_D16_UNORM: + result = "AHARDWAREBUFFER_FORMAT_D16_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_D24_UNORM: + result = "AHARDWAREBUFFER_FORMAT_D24_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT: + result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT"; + break; + case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_S8_UINT: + result = "AHARDWAREBUFFER_FORMAT_S8_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420: + result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420"; + break; + case AHARDWAREBUFFER_FORMAT_YCbCr_P010: + result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010"; + break; + case AHARDWAREBUFFER_FORMAT_YCbCr_P210: + result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210"; + break; + case AHARDWAREBUFFER_FORMAT_R8_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R8_UNORM"; + break; + case AHARDWAREBUFFER_FORMAT_R16_UINT: + result = "AHARDWAREBUFFER_FORMAT_R16_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_R16G16_UINT: + result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT"; + break; + case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM: + result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM"; + break; + } + return result; +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h new file mode 100644 index 00000000..037bc6c2 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/debug_ahb.h @@ -0,0 +1,42 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#pragma once + +#include +#include +#include +#include + +#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \ + if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \ + { \ + const std::string usage_string = \ + ahardwareBufferDecodeUsageFlagsToString( \ + static_cast( \ + ahardwareBuffer_Desc.usage)); \ + log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \ + "%u, layers = %u)\n", \ + ahardwareBufferFormatToString(format.aHardwareBufferFormat) \ + .c_str(), \ + usage_string.c_str(), ahardwareBuffer_Desc.width, \ + ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \ + continue; \ + } + +std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format); +std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag); +std::string +ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags); \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp new file mode 100644 index 00000000..8900e6ff --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp @@ -0,0 +1,23 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/testHarness.h" + +int main(int argc, const char *argv[]) +{ + return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), + test_registry::getInstance().definitions(), false, 0); +} \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp new file mode 100644 index 00000000..f0747d0e --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp @@ -0,0 +1,1911 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/compat.h" +#include "harness/kernelHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/typeWrappers.h" +#include "harness/errorHelpers.h" +#include "harness/extensionHelpers.h" +#include +#include "debug_ahb.h" + +static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage) +{ + return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0; +} + +struct ahb_format_table +{ + AHardwareBuffer_Format aHardwareBufferFormat; + cl_image_format clImageFormat; + cl_mem_object_type clMemObjectType; +}; + +struct ahb_usage_table +{ + AHardwareBuffer_UsageFlags usageFlags; +}; + +struct ahb_image_size_table +{ + uint32_t width; + uint32_t height; +}; + +ahb_image_size_table test_sizes[] = { + { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 } +}; + +ahb_usage_table test_usages[] = { + { static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER) }, + { static_cast( + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }, + { static_cast( + AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER + | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }, +}; + +ahb_format_table test_formats[] = { + { AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT, + { CL_RGBA, CL_HALF_FLOAT }, + CL_MEM_OBJECT_IMAGE2D }, + { AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM, + { CL_RGBA, CL_UNORM_INT8 }, + CL_MEM_OBJECT_IMAGE2D }, + + { AHARDWAREBUFFER_FORMAT_R8_UNORM, + { CL_R, CL_UNORM_INT8 }, + CL_MEM_OBJECT_IMAGE2D }, +}; + +static const char *diff_images_kernel_source = { + R"( + #define PIXEL_FORMAT %s4 + __kernel void verify_image( read_only image2d_t ahb_image , read_only image2d_t ocl_image, global PIXEL_FORMAT *ocl_pixel, global PIXEL_FORMAT *ahb_pixel) + { + int tidX = get_global_id(0); + int tidY = get_global_id(1); + int idx = tidY * get_global_size(0) + tidX; + + sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + PIXEL_FORMAT a = read_image%s(ahb_image, sampler, (int2)( tidX, tidY ) ); + PIXEL_FORMAT o = read_image%s(ocl_image, sampler, (int2)( tidX, tidY ) ); + ahb_pixel[idx] = a; + ocl_pixel[idx] = o; + })" +}; + +// Checks that the inferred image format is correct +REGISTER_TEST(test_images) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + for (const auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem image = clCreateImageWithProperties( + context, props, CL_MEM_READ_WRITE, nullptr, nullptr, + nullptr, &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_format imageFormat = { 0 }; + err = clGetImageInfo(image, CL_IMAGE_FORMAT, + sizeof(cl_image_format), &imageFormat, + nullptr); + test_error(err, "Failed to query image format"); + + if (imageFormat.image_channel_order + != format.clImageFormat.image_channel_order) + { + log_error("Expected channel order %d, got %d\n", + format.clImageFormat.image_channel_order, + imageFormat.image_channel_order); + return TEST_FAIL; + } + + if (imageFormat.image_channel_data_type + != format.clImageFormat.image_channel_data_type) + { + log_error("Expected image_channel_data_type %d, got %d\n", + format.clImageFormat.image_channel_data_type, + imageFormat.image_channel_data_type); + return TEST_FAIL; + } + + test_error(clReleaseMemObject(image), + "Failed to release image"); + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_images_read) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = imageInfo.type; + imageDesc.image_width = imageInfo.width; + imageDesc.image_height = imageInfo.height; + imageDesc.image_row_pitch = imageInfo.rowPitch; + + clMemWrapper opencl_image = clCreateImage( + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + imageInfo.format, &imageDesc, srcData, &err); + test_error(err, "Failed to create CL image"); + + ExplicitTypes outputType; + const char *readFormat; + + if (format.clImageFormat.image_channel_data_type + == CL_UNSIGNED_INT8) + { + readFormat = "ui"; + outputType = kUInt; + } + else + { + readFormat = "f"; + outputType = kFloat; + } + + size_t verify_buffer_size = imageInfo.width * imageInfo.height + * get_explicit_type_size(outputType) * 4; + + clMemWrapper ocl_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to create ocl pixel buffer"); + + clMemWrapper ahb_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to crete ahb pixel buffer"); + + // Populate kernel + std::vector programSrc( + 2 * strlen(diff_images_kernel_source)); + const char *outputTypeName = get_explicit_type_name(outputType); + + sprintf(programSrc.data(), diff_images_kernel_source, + outputTypeName, // Read image format 1 + readFormat, // Read image return type 1 + readFormat // Read image return type 2 + ); + const char *ptr = programSrc.data(); + clProgramWrapper program; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "verify_image"); + + // Set kernel args + + err = + clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &opencl_image); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), + &ocl_pixel_buffer); + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), + &ahb_pixel_buffer); + test_error(err, "clSetKernelArg failed"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t global_work_size[] = { imageInfo.width, + imageInfo.height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, + global_work_size, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &opencl_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + // Read buffer and verify + std::vector ocl_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ocl_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ocl_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + std::vector ahb_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ahb_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ahb_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + for (unsigned row = 0; row < imageInfo.height; row++) + { + for (unsigned col = 0; col < imageInfo.width; col++) + { + unsigned pixel_index = row * imageInfo.width + col; + switch (outputType) + { + case kFloat: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + log_error( + "At coord (%u, %u) expected " + "(%f,%f,%f,%f), got (%f,%f,%f,%f)", + col, row, cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3, + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + case kUInt: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + log_error( + "At coord (%u, %u) expected " + "(%u,%u,%u,%u), got (%u,%u,%u,%u)", + col, row, cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3, + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + return TEST_FAIL; + } + } + break; + default: test_fail("Unknown output type"); + } + } + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_read_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + std::vector out_image(srcBytes); + err = clEnqueueReadImage(queue, imported_image, CL_TRUE, origin, + region, imageInfo.rowPitch, 0, + out_image.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueCopyImage failed"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + const char *out_image_ptr = out_image.data(); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += imageInfo.rowPitch; + out_image_ptr += imageInfo.rowPitch; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + cl_image_desc imageDesc = { 0 }; + imageDesc.image_type = imageInfo.type; + imageDesc.image_width = imageInfo.width; + imageDesc.image_height = imageInfo.height; + + clMemWrapper opencl_image = + clCreateImage(context, CL_MEM_READ_WRITE, imageInfo.format, + &imageDesc, nullptr, &err); + test_error(err, "Failed to create CL image"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + err = clEnqueueCopyImage(queue, imported_image, opencl_image, + origin, origin, region, 0, nullptr, + nullptr); + test_error(err, "Failed calling clEnqueueCopyImage"); + + ExplicitTypes outputType; + const char *readFormat; + + if (format.clImageFormat.image_channel_data_type + == CL_UNSIGNED_INT8) + { + readFormat = "ui"; + outputType = kUInt; + } + else + { + readFormat = "f"; + outputType = kFloat; + } + + size_t verify_buffer_size = imageInfo.width * imageInfo.height + * get_explicit_type_size(outputType) * 4; + + clMemWrapper ocl_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to create ocl pixel buffer"); + + clMemWrapper ahb_pixel_buffer = + clCreateBuffer(context, CL_MEM_READ_WRITE, + verify_buffer_size, nullptr, &err); + test_error(err, "Failed to crete ahb pixel buffer"); + + // sprintf the kernel + std::vector programSrc( + 2 * strlen(diff_images_kernel_source)); + const char *outputTypeName = get_explicit_type_name(outputType); + + sprintf(programSrc.data(), diff_images_kernel_source, + outputTypeName, /*read image format 1 */ + readFormat, /*read image return type 1 */ + readFormat /*read image return type 2 */ + ); + const char *ptr = programSrc.data(); + clProgramWrapper program; + clKernelWrapper kernel; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "verify_image"); + + // set kernel args + + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), + &imported_image); /*imported image */ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), + &opencl_image); /*image made in opencl*/ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 2, sizeof(cl_mem), + &ocl_pixel_buffer); /*verification buffer*/ + test_error(err, "clSetKernelArg failed"); + + err = clSetKernelArg(kernel, 3, sizeof(cl_mem), + &ahb_pixel_buffer); /*verification buffer*/ + test_error(err, "clSetKernelArg failed"); + + size_t global_work_size[] = { (imageInfo.width), + (imageInfo.height) }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, + global_work_size, nullptr, 0, + nullptr, nullptr); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + // Read buffer and verify + std::vector ocl_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ocl_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ocl_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + std::vector ahb_verify_data(verify_buffer_size); + err = clEnqueueReadBuffer( + queue, ahb_pixel_buffer, CL_BLOCKING, 0, verify_buffer_size, + ahb_verify_data.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + for (unsigned row = 0; row < imageInfo.height; row++) + { + for (unsigned col = 0; col < imageInfo.width; col++) + { + unsigned pixel_index = row * imageInfo.width + col; + switch (outputType) + { + case kFloat: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + printf("At %u\n", pixel_index); + printf("Expected %f,%f,%f,%f\n", + cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3); + printf("Got %f,%f,%f,%f\n", + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + case kUInt: { + auto *cl_ptr = reinterpret_cast( + ocl_verify_data.data()); + auto *ahb_ptr = reinterpret_cast( + ahb_verify_data.data()); + + if ((cl_ptr[pixel_index].s0 + != ahb_ptr[pixel_index].s0) + || (cl_ptr[pixel_index].s1 + != ahb_ptr[pixel_index].s1) + || (cl_ptr[pixel_index].s2 + != ahb_ptr[pixel_index].s2) + || (cl_ptr[pixel_index].s3 + != ahb_ptr[pixel_index].s3)) + { + printf("At %u\n", pixel_index); + printf("Expected %u,%u,%u,%u\n", + cl_ptr[pixel_index].s0, + cl_ptr[pixel_index].s1, + cl_ptr[pixel_index].s2, + cl_ptr[pixel_index].s3); + printf("Got %u,%u,%u,%u\n", + ahb_ptr[pixel_index].s0, + ahb_ptr[pixel_index].s1, + ahb_ptr[pixel_index].s2, + ahb_ptr[pixel_index].s3); + + return TEST_FAIL; + } + } + break; + default: test_fail("Unknown output type"); + } + } + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_image_to_buffer) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Populate AHB with random data + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = hardware_buffer_desc.stride * pixelSize; + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + memcpy(hardware_buffer_data, srcData, srcBytes); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + clMemWrapper opencl_buffer = clCreateBuffer( + context, CL_MEM_READ_WRITE, srcBytes, nullptr, &err); + test_error(err, "Failed to create CL buffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjectsKHR failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueCopyImageToBuffer(queue, imported_image, + opencl_buffer, origin, region, + 0, 0, nullptr, nullptr); + test_error( + err, "Failed to copy imported AHB image to opencl buffer"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjectsKHR failed"); + + std::vector out_buffer(srcBytes); + err = clEnqueueReadBuffer(queue, opencl_buffer, CL_TRUE, 0, + srcBytes, out_buffer.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + char *out_buffer_ptr = out_buffer.data(); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_buffer_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_buffer_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_buffer_ptr + pixel_size * where, &imageInfo, + line, 1); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += imageInfo.rowPitch; + out_buffer_ptr += scanlineSize; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_copy_buffer_to_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + // Generate random data for opencl buffer + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // data is tightly packed in buffer + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + const size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + clMemWrapper opencl_buffer = clCreateBuffer( + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, srcBytes, + srcData, &err); + test_error(err, "Failed to create CL buffer"); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_WRITE, nullptr, nullptr, + nullptr, &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueCopyBufferToImage(queue, opencl_buffer, + imported_image, 0, origin, + region, 0, nullptr, nullptr); + test_error( + err, "Failed to copy opencl buffer to imported AHB image"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjects failed"); + + clFinish(queue); + + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixelSize * where, + out_image_ptr + pixelSize * where, &imageInfo, + line, 1); + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + return TEST_FAIL; + } + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += + scanlineSize; // image data is tightly packed in buffer + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_write_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + // Generate data to write to image + const size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // Data is tightly packed + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + const size_t srcBytes = get_image_size(&imageInfo); + test_assert_error(srcBytes > 0, "Image cannot have zero size"); + + BufferOwningPtr srcData; + generate_random_image_data(&imageInfo, srcData, seed); + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + err = clEnqueueWriteImage(queue, imported_image, CL_TRUE, + origin, region, 0, 0, srcData, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueWriteImage"); + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clReleaseExternalMemObject failed"); + + clFinish(queue); + + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + auto srcData_ptr = static_cast(srcData); + + const size_t scanlineSize = + imageInfo.width * get_pixel_size(imageInfo.format); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(srcData_ptr, out_image_ptr, scanlineSize) != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, srcData_ptr, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, srcData_ptr + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + return TEST_FAIL; + } + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + srcData_ptr += scanlineSize; // Data is tightly packed + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_enqueue_fill_image) +{ + cl_int err = CL_SUCCESS; + RandomSeed seed(gRandomSeed); + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + GET_PFN(device, clEnqueueAcquireExternalMemObjectsKHR); + GET_PFN(device, clEnqueueReleaseExternalMemObjectsKHR); + + for (auto format : test_formats) + { + log_info("Testing %s\n", + ahardwareBufferFormatToString(format.aHardwareBufferFormat) + .c_str()); + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = format.aHardwareBufferFormat; + for (auto usage : test_usages) + { + // Filter out usage flags that are not readable on device + if (!isAHBUsageReadable(usage.usageFlags)) + { + continue; + } + + aHardwareBufferDesc.usage = usage.usageFlags; + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width; + aHardwareBufferDesc.height = resolution.height; + aHardwareBufferDesc.layers = 1; + + CHECK_AHARDWARE_BUFFER_SUPPORT(aHardwareBufferDesc, format); + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = AHardwareBuffer_allocate(&aHardwareBufferDesc, + &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + // Determine AHB memory layout + AHardwareBuffer_Desc hardware_buffer_desc = {}; + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + test_assert_error(hardware_buffer_desc.width + == resolution.width, + "AHB has unexpected width"); + test_assert_error(hardware_buffer_desc.height + == resolution.height, + "AHB has unexpected height"); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + clMemWrapper imported_image = clCreateImageWithProperties( + context, props, CL_MEM_READ_ONLY, nullptr, nullptr, nullptr, + &err); + test_error(err, + "Failed to create CL image from AHardwareBuffer"); + + // Create image info struct + size_t pixelSize = get_pixel_size(&format.clImageFormat); + image_descriptor imageInfo = { 0 }; + imageInfo.format = &format.clImageFormat; + imageInfo.type = format.clMemObjectType; + imageInfo.width = resolution.width; + imageInfo.height = resolution.height; + imageInfo.rowPitch = resolution.width * resolution.height + * pixelSize; // Data is tightly packed + test_assert_error(imageInfo.rowPitch + >= pixelSize * imageInfo.width, + "Row pitch is smaller than width"); + + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { imageInfo.width, imageInfo.height, 1 }; + + auto verificationValue = static_cast(malloc(pixelSize)); + if (!verificationValue) + { + log_error( + "Unable to malloc %zu bytes for verificationValue", + pixelSize); + return TEST_FAIL; + } + + err = clEnqueueAcquireExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueAcquireExternalMemObjects failed"); + + // Generate pixel color and fill image + switch (format.clImageFormat.image_channel_data_type) + { + case CL_HALF_FLOAT: + DetectFloatToHalfRoundingMode( + queue); // Intentional drop-through + case CL_UNORM_INT8: { + auto pattern_decimal = + static_cast(genrand_real1(seed)); + cl_float fillColor[4] = { pattern_decimal, + pattern_decimal, + pattern_decimal, + pattern_decimal }; + + err = clEnqueueFillImage(queue, imported_image, + fillColor, origin, region, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueFillImage"); + + pack_image_pixel(fillColor, &format.clImageFormat, + verificationValue); + break; + } + case CL_UNSIGNED_INT16: { + const cl_uint pattern_whole = genrand_int32(seed); + cl_uint fillColor[4] = { pattern_whole, pattern_whole, + pattern_whole, pattern_whole }; + + err = clEnqueueFillImage(queue, imported_image, + fillColor, origin, region, 0, + nullptr, nullptr); + test_error(err, "Failed calling clEnqueueFillImage"); + + pack_image_pixel(fillColor, &format.clImageFormat, + verificationValue); + break; + } + default: + log_info("Unsupported image channel data type"); + continue; + } + + err = clEnqueueReleaseExternalMemObjectsKHR( + queue, 1, &imported_image, 0, nullptr, nullptr); + test_error(err, "clEnqueueReleaseExternalMemObjects failed"); + + clFinish(queue); + AHardwareBuffer_describe(aHardwareBuffer, + &hardware_buffer_desc); + + void *hardware_buffer_data = nullptr; + ahb_result = AHardwareBuffer_lock( + aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, + nullptr, &hardware_buffer_data); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_lock failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + auto out_image_ptr = static_cast(hardware_buffer_data); + const size_t scanlineSize = imageInfo.width * pixelSize; + + + auto verificationLine = + static_cast(malloc(pixelSize * scanlineSize)); + if (!verificationLine) + { + free(verificationValue); + log_error("Unable to malloc %zu bytes for verificationLine", + pixelSize * scanlineSize); + return TEST_FAIL; + } + char *index = verificationLine; + for (size_t x = 0; x < imageInfo.width; x++) + { + memcpy(index, verificationValue, pixelSize); + index += pixelSize; + } + + free(verificationValue); + + // Count the number of bytes successfully matched + size_t total_matched = 0; + for (size_t line = 0; line < imageInfo.height; line++) + { + + if (memcmp(verificationLine, out_image_ptr, scanlineSize) + != 0) + { + // Find the first differing pixel + const size_t pixel_size = + get_pixel_size(imageInfo.format); + size_t where = compare_scanlines( + &imageInfo, verificationLine, out_image_ptr); + if (where < imageInfo.width) + { + print_first_pixel_difference_error( + where, verificationLine + pixel_size * where, + out_image_ptr + pixel_size * where, &imageInfo, + line, 1); + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, + nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with " + "code %d\n", + ahb_result); + free(verificationLine); + return TEST_FAIL; + } + free(verificationLine); + return TEST_FAIL; + } + } + + total_matched += scanlineSize; + out_image_ptr += hardware_buffer_desc.stride * pixelSize; + } + + ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_unlock failed with code %d\n", + ahb_result); + free(verificationLine); + return TEST_FAIL; + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + free(verificationLine); + + if (total_matched == 0) + { + test_fail("Zero bytes matched"); + } + } + } + } + + return TEST_PASS; +} + +REGISTER_TEST(test_blob) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + for (auto resolution : test_sizes) + { + aHardwareBufferDesc.width = resolution.width * resolution.height; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + std::string usage_string = ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info("Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast( + aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + continue; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", + ahb_result); + return TEST_FAIL; + } + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_error(err, "Failed to create CL buffer from AHardwareBuffer"); + + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + } + + return TEST_PASS; +} diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp new file mode 100644 index 00000000..ca010b58 --- /dev/null +++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb_negative.cpp @@ -0,0 +1,246 @@ +// +// Copyright (c) 2025 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "harness/compat.h" +#include "harness/kernelHelpers.h" +#include "harness/imageHelpers.h" +#include "harness/errorHelpers.h" +#include +#include "debug_ahb.h" + +REGISTER_TEST(test_buffer_format_negative) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + aHardwareBufferDesc.width = 64; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + const std::string usage_string = + ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info( + "Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast(aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + const int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, 0, nullptr, &err); + test_assert_error(err == CL_INVALID_OPERATION, + "To create a buffer the aHardwareFormat must be " + "AHARDWAREBUFFER_FORMAT_BLOB"); + + if (buffer != nullptr) + { + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} + +REGISTER_TEST(test_buffer_size_negative) +{ + cl_int err = CL_SUCCESS; + constexpr size_t buffer_size = 64; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + aHardwareBufferDesc.width = buffer_size; + aHardwareBufferDesc.height = 1; + aHardwareBufferDesc.layers = 1; + aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc)) + { + const std::string usage_string = + ahardwareBufferDecodeUsageFlagsToString( + static_cast( + aHardwareBufferDesc.usage)); + log_info( + "Unsupported format %s, usage flags %s\n", + ahardwareBufferFormatToString( + static_cast(aHardwareBufferDesc.format)) + .c_str(), + usage_string.c_str()); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer *aHardwareBuffer = nullptr; + const int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + log_info("Testing %s\n", + ahardwareBufferFormatToString(static_cast( + aHardwareBufferDesc.format)) + .c_str()); + + cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + cl_mem buffer = clCreateBufferWithProperties( + context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err); + test_assert_error(err == CL_INVALID_BUFFER_SIZE, + "Wrong error value returned"); + + if (buffer != nullptr) + { + test_error(clReleaseMemObject(buffer), "Failed to release buffer"); + } + + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} + +REGISTER_TEST(test_images_negative) +{ + cl_int err = CL_SUCCESS; + + if (!is_extension_available(device, "cl_khr_external_memory")) + { + log_info("cl_khr_external_memory is not supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + if (!is_extension_available( + device, "cl_khr_external_memory_android_hardware_buffer")) + { + log_info("cl_khr_external_memory_android_hardware_buffer is not " + "supported on this platform. " + "Skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + AHardwareBuffer_Desc aHardwareBufferDesc = { 0 }; + aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + aHardwareBufferDesc.usage = static_cast( + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN + | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN + | AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE + | AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER); + aHardwareBufferDesc.width = 64; + aHardwareBufferDesc.height = 64; + aHardwareBufferDesc.layers = 1; + + AHardwareBuffer *aHardwareBuffer = nullptr; + int ahb_result = + AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer); + if (ahb_result != 0) + { + log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result); + return TEST_FAIL; + } + + const cl_mem_properties props[] = { + CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR, + reinterpret_cast(aHardwareBuffer), 0 + }; + + constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 }; + cl_mem image = + clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, + &image_format, nullptr, nullptr, &err); + test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + "Wrong error value returned"); + if (image != nullptr) + { + test_error(clReleaseMemObject(image), "Failed to release image"); + } + + constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 }; + image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE, + nullptr, &image_desc, nullptr, &err); + test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR, + "Wrong error value returned"); + if (image != nullptr) + { + test_error(clReleaseMemObject(image), "Failed to release image"); + } + AHardwareBuffer_release(aHardwareBuffer); + aHardwareBuffer = nullptr; + + return TEST_PASS; +} diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index 7611fbf3..198bf046 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2)) return TEST_PASS; } -// Confirm that signal a semaphore with no event dependencies will not result -// in an implicit dependency on everything previously submitted -REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2)) -{ - REQUIRE_EXTENSION("cl_khr_external_semaphore"); - - if (init_vulkan_device(1, &device)) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(device, clEnqueueSignalSemaphoresKHR); - GET_PFN(device, clEnqueueWaitSemaphoresKHR); - - std::vector - vkExternalSemaphoreHandleTypeList = - getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice); - - if (vkExternalSemaphoreHandleTypeList.empty()) - { - test_fail("No external semaphore handle types found\n"); - } - - for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType : - vkExternalSemaphoreHandleTypeList) - { - log_info_semaphore_type(vkExternalSemaphoreHandleType); - VulkanSemaphore vkVk2CLSemaphore(vkDevice, - vkExternalSemaphoreHandleType); - - auto sema_ext = clExternalImportableSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Create Kernel - clProgramWrapper program; - clKernelWrapper kernel; - err = create_single_kernel_helper(context, &program, &kernel, 1, - &source, "empty"); - test_error(err, "Could not create kernel"); - - // Enqueue task_1 (dependency on user_event) - clEventWrapper task_1_event; - err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event); - test_error(err, "Could not enqueue task 1"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - - cl_event event_list[] = { signal_event, wait_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait on events"); - - // Ensure all events are completed except for task_1 - test_assert_event_inprogress(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - } - - return TEST_PASS; -} - // Confirm that a semaphore can be reused multiple times REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2)) { diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp index e3351cd8..ce146b41 100644 --- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp @@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase } }; -struct SimpleSemaphore2 : public SemaphoreTestBase -{ - SimpleSemaphore2(cl_device_id device, cl_context context, - cl_command_queue queue, cl_int nelems) - : SemaphoreTestBase(device, context, queue, nelems) - {} - - cl_int Run() override - { - cl_int err = CL_SUCCESS; - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast(CL_SEMAPHORE_TYPE_KHR), - static_cast( - CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - semaphore = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Create Kernel - clProgramWrapper program; - clKernelWrapper kernel; - err = create_single_kernel_helper(context, &program, &kernel, 1, - &source, "empty"); - test_error(err, "Could not create kernel"); - - // Enqueue task_1 (dependency on user_event) - clEventWrapper task_1_event; - err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event); - test_error(err, "Could not enqueue task 1"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0, - nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0, - nullptr, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for task_1 - test_assert_event_inprogress(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(task_1_event); - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return CL_SUCCESS; - } -}; - struct SemaphoreReuse : public SemaphoreTestBase { SemaphoreReuse(cl_device_id device, cl_context context, @@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2)) num_elements); } -// Confirm that signal a semaphore with no event dependencies will not result -// in an implicit dependency on everything previously submitted -REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2)) -{ - return MakeAndRunTest(device, context, queue, - num_elements); -} - // Confirm that a semaphore can be reused multiple times REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2)) { diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index ab7fe257..15b940f5 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -16,6 +16,7 @@ #include "harness/compat.h" #include "harness/kernelHelpers.h" #include "harness/testHarness.h" +#include "harness/parseParameters.h" #include diff --git a/test_conformance/half/cl_utils.cpp b/test_conformance/half/cl_utils.cpp index 4fd42c02..7090f435 100644 --- a/test_conformance/half/cl_utils.cpp +++ b/test_conformance/half/cl_utils.cpp @@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0; uint32_t gComputeDevices = 0; size_t gMaxThreadGroupSize = 0; size_t gWorkGroupSize = 0; -bool gWimpyMode = false; int gWimpyReductionFactor = 512; int gTestDouble = 0; bool gHostReset = false; diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h index da6073cf..d7754ebc 100644 --- a/test_conformance/half/cl_utils.h +++ b/test_conformance/half/cl_utils.h @@ -74,7 +74,6 @@ extern bool gHostReset; // gWimpyMode indicates if we run the test in wimpy mode where we limit the // size of 32 bit ranges to a much smaller set. This is meant to be used // as a smoke test -extern bool gWimpyMode; extern int gWimpyReductionFactor; uint64_t ReadTime( void ); diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp index 743bc45c..0beec938 100644 --- a/test_conformance/half/main.cpp +++ b/test_conformance/half/main.cpp @@ -83,13 +83,6 @@ int main (int argc, const char **argv ) if( (error = ParseArgs( argc, argv )) ) goto exit; - if (gIsEmbedded) { - vlog( "\tProfile: Embedded\n" ); - }else - { - vlog( "\tProfile: Full\n" ); - } - fflush( stdout ); error = runTestHarnessWithCheck( argCount, argList, test_registry::getInstance().num_tests(), @@ -114,6 +107,10 @@ exit: static int ParseArgs( int argc, const char **argv ) { + if (gListTests) + { + return 0; + } int i; argList = (const char **)calloc(argc, sizeof(char *)); if( NULL == argList ) @@ -181,9 +178,6 @@ static int ParseArgs( int argc, const char **argv ) case 'r': gHostReset = true; break; - case 'w': // Wimpy mode - gWimpyMode = true; - break; case '[': parseWimpyReductionFactor( arg, gWimpyReductionFactor); break; @@ -202,12 +196,6 @@ static int ParseArgs( int argc, const char **argv ) } } - if (getenv("CL_WIMPY_MODE")) { - vlog( "\n" ); - vlog( "*** Detected CL_WIMPY_MODE env ***\n" ); - gWimpyMode = 1; - } - PrintArch(); if( gWimpyMode ) { @@ -217,6 +205,16 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } + + if (gIsEmbedded) + { + vlog("\tProfile: Embedded\n"); + } + else + { + vlog("\tProfile: Full\n"); + } + return 0; } @@ -227,7 +225,6 @@ static void PrintUsage( void ) "supported)\n"); vlog("\t\t-t\tToggle reporting performance data.\n"); vlog("\t\t-r\tReset buffers on host instead of on device.\n"); - vlog("\t\t-w\tRun in wimpy mode\n"); vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-12, default factor(%u)\n", gWimpyReductionFactor); diff --git a/test_conformance/images/common.cpp b/test_conformance/images/common.cpp index 95845b72..8120e3ab 100644 --- a/test_conformance/images/common.cpp +++ b/test_conformance/images/common.cpp @@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue, cl_mem_flags buffer_flags = CL_MEM_READ_WRITE; if (enable_pitch) { - if (version.major() == 1) + if (version.get_major() == 1) { host_ptr = malloc(imageInfo->rowPitch); } diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index 245de3ac..26baed75 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -878,18 +878,16 @@ int test_read_image(cl_context context, cl_command_queue queue, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], - norm_offset_x, - norm_offset_y, - norm_offset_z, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, imageSampler, + tempOut, 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f (max " "allowed: %2.2f)\n\n", @@ -931,9 +929,6 @@ int test_read_image(cl_context context, cl_command_queue queue, // Validate float results float *resultPtr = (float *)(char *)resultValues; float expected[4], error = 0.0f; - float maxErr = get_max_relative_error( - imageInfo->format, imageSampler, image_type_3D, - CL_FILTER_LINEAR == imageSampler->filter_mode); for (size_t z = 0, j = 0; z < depth_lod; z++) { @@ -1242,26 +1237,25 @@ int test_read_image(cl_context context, cl_command_queue queue, j, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - (num_dimensions > 1) - ? yOffsetValues[j] - : 0.0f, - image_type_3D - ? zOffsetValues[j] - : 0.0f, - norm_offset_x, - (num_dimensions > 1) - ? norm_offset_y - : 0.0f, - image_type_3D - ? norm_offset_z - : 0.0f, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f, %2.2f, " "%2.2f, %2.2f (max " @@ -1632,26 +1626,25 @@ int test_read_image(cl_context context, cl_command_queue queue, j, numTries, numClamped, true, lod); log_error("Step by step:\n"); - FloatPixel temp = - sample_image_pixel_float_offset( - imagePtr, imageInfo, - xOffsetValues[j], - (num_dimensions > 1) - ? yOffsetValues[j] - : 0.0f, - image_type_3D - ? zOffsetValues[j] - : 0.0f, - norm_offset_x, - (num_dimensions > 1) - ? norm_offset_y - : 0.0f, - image_type_3D - ? norm_offset_z - : 0.0f, - imageSampler, tempOut, - 1 /*verbose*/, - &hasDenormals, lod); + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); log_error( "\tulps: %2.2f, %2.2f, " "%2.2f, %2.2f (max " diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp index 9c4e332a..5c7c7b29 100644 --- a/test_conformance/images/kernel_read_write/test_iterations.cpp +++ b/test_conformance/images/kernel_read_write/test_iterations.cpp @@ -945,7 +945,7 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double // Validate float results float *resultPtr = (float *)(char *)resultValues; float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); + for( size_t y = 0, j = 0; y < height_lod; y++ ) { for( size_t x = 0; x < width_lod; x++, j++ ) diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt index 9966ca6f..7bc991f8 100644 --- a/test_conformance/integer_ops/CMakeLists.txt +++ b/test_conformance/integer_ops/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME INTEGER_OPS) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_int_basic_ops.cpp diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp index 32849af7..cd90d1bf 100644 --- a/test_conformance/integer_ops/main.cpp +++ b/test_conformance/integer_ops/main.cpp @@ -26,14 +26,32 @@ void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d ) { - static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL, - 0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL, - 0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL, - 0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL, - 0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL, - 0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL, - 0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL, - 0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL}; + static const cl_long sUniqueValues[] = { + (cl_long)0x3333333333333333LL, (cl_long)0x5555555555555555LL, + (cl_long)0x9999999999999999LL, (cl_long)0xaaaaaaaaaaaaaaaaLL, + (cl_long)0xccccccccccccccccLL, (cl_long)0x3030303030303030LL, + (cl_long)0x5050505050505050LL, (cl_long)0x9090909090909090LL, + (cl_long)0xa0a0a0a0a0a0a0a0LL, (cl_long)0xc0c0c0c0c0c0c0c0LL, + (cl_long)0xf0f0f0f0f0f0f0f0LL, (cl_long)0x0303030303030303LL, + (cl_long)0x0505050505050505LL, (cl_long)0x0909090909090909LL, + (cl_long)0x0a0a0a0a0a0a0a0aLL, (cl_long)0x0c0c0c0c0c0c0c0cLL, + (cl_long)0x0f0f0f0f0f0f0f0fLL, (cl_long)0x3300330033003300LL, + (cl_long)0x5500550055005500LL, (cl_long)0x9900990099009900LL, + (cl_long)0xaa00aa00aa00aa00LL, (cl_long)0xcc00cc00cc00cc00LL, + (cl_long)0xff00ff00ff00ff00LL, (cl_long)0x0033003300330033LL, + (cl_long)0x0055005500550055LL, (cl_long)0x0099009900990099LL, + (cl_long)0x00aa00aa00aa00aaLL, (cl_long)0x00cc00cc00cc00ccLL, + (cl_long)0x00ff00ff00ff00ffLL, (cl_long)0x3333333300000000LL, + (cl_long)0x5555555500000000LL, (cl_long)0x9999999900000000LL, + (cl_long)0xaaaaaaaa00000000LL, (cl_long)0xcccccccc00000000LL, + (cl_long)0xffffffff00000000LL, (cl_long)0x0000000033333333LL, + (cl_long)0x0000000055555555LL, (cl_long)0x0000000099999999LL, + (cl_long)0x00000000aaaaaaaaLL, (cl_long)0x00000000ccccccccLL, + (cl_long)0x00000000ffffffffLL, (cl_long)0x3333000000003333LL, + (cl_long)0x5555000000005555LL, (cl_long)0x9999000000009999LL, + (cl_long)0xaaaa00000000aaaaLL, (cl_long)0xcccc00000000ccccLL, + (cl_long)0xffff00000000ffffLL + }; static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 }; if( sSpecialValues[ 0 ] == 0 ) diff --git a/test_conformance/integer_ops/test_int_basic_ops.cpp b/test_conformance/integer_ops/test_int_basic_ops.cpp index b9a47d75..aa48a6ba 100644 --- a/test_conformance/integer_ops/test_int_basic_ops.cpp +++ b/test_conformance/integer_ops/test_int_basic_ops.cpp @@ -22,6 +22,7 @@ #include "harness/conversions.h" #include "harness/ThreadPool.h" +#include "harness/parseParameters.h" #define NUM_TESTS 23 @@ -823,10 +824,10 @@ int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queu int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) { int errors = 0; - if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) { - log_info("Detected CL_WIMPY_MODE env\n"); - log_info("Skipping long test\n"); - return 0; + if (gWimpyMode && num == LONG_MATH_SHIFT_SIZE) + { + log_info("Running in wimpy mode, skipping long test\n"); + return 0; } int i; diff --git a/test_conformance/integer_ops/verification_and_generation_functions.cpp b/test_conformance/integer_ops/verification_and_generation_functions.cpp index 9a7abf78..4262afb4 100644 --- a/test_conformance/integer_ops/verification_and_generation_functions.cpp +++ b/test_conformance/integer_ops/verification_and_generation_functions.cpp @@ -1370,7 +1370,9 @@ verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) { - static const cl_ushort specialCaseList[] = { 0, -1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX }; + static const cl_ushort specialCaseList[] = { + 0, (cl_ushort)-1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX + }; int j; // Set the inputs to a random number @@ -1812,7 +1814,9 @@ verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, c void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) { - static const cl_uchar specialCaseList[] = { 0, -1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX }; + static const cl_uchar specialCaseList[] = { + 0, (cl_uchar)-1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX + }; int j; // FIXME: we really should just check every char against every char here diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index 4428b422..d8c8ad5c 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -248,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } // Init input array - cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; + cl_double *p = (cl_double *)gIn + thread_id * buffer_elements; cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements; size_t idx = 0; int totalSpecialValueCount = specialValuesCount * specialValuesIntCount; @@ -257,7 +257,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Test edge cases if (job_id <= (cl_uint)lastSpecialJobIndex) { - cl_double *fp = (cl_double *)p; cl_int *ip2 = (cl_int *)p2; uint32_t x, y; @@ -266,7 +265,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) for (; idx < buffer_elements; idx++) { - fp[idx] = specialValues[x]; + p[idx] = specialValues[x]; ip2[idx] = specialValuesInt[y]; if (++x >= specialValuesCount) { diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index cce6e122..17eb998f 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } + bool correctlyRounded = strcmp(f->name, "divide_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/binary_two_results_i_half.cpp b/test_conformance/math_brute_force/binary_two_results_i_half.cpp index a2379431..0b48e33e 100644 --- a/test_conformance/math_brute_force/binary_two_results_i_half.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i_half.cpp @@ -260,7 +260,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode) if (t[j] == q[j] && t2[j] == q2[j]) continue; // Check for paired NaNs - if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j]) + if (isnan_fp(t[j]) && isnan_fp(q[j]) && t2[j] == q2[j]) continue; cl_half test = ((cl_half *)q)[j]; @@ -282,7 +282,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode) // then the standard either neglects to say what is returned // in iptr or leaves it undefined or implementation defined. int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f) - || IsHalfNaN(p2[j]) || IsHalfNaN(p[j]); + || isnan_fp(p2[j]) || isnan_fp(p[j]); if (iptrUndefined) iErr = 0; int fail = !(fabsf(err) <= half_ulps && iErr == 0); diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp index df45a700..257e2595 100644 --- a/test_conformance/math_brute_force/common.cpp +++ b/test_conformance/math_brute_force/common.cpp @@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel, if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; } -std::string GetBuildOptions(bool relaxed_mode) +std::string GetBuildOptions(const BuildKernelInfo &info) { std::ostringstream options; @@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode) options << " -cl-denorms-are-zero"; } - if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) - { - options << " -cl-fp32-correctly-rounded-divide-sqrt"; - } - - if (relaxed_mode) + if (info.relaxedMode) { options << " -cl-fast-relaxed-math"; } + if (info.correctlyRounded) + { + options << " -cl-fp32-correctly-rounded-divide-sqrt"; + } + return options.str(); } @@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id, // Create the program. clProgramWrapper &program = info.programs[vector_size_index]; - auto options = GetBuildOptions(info.relaxedMode); + auto options = GetBuildOptions(info); int error = create_single_kernel_helper(gContext, &program, nullptr, sources.size(), sources.data(), nullptr, options.c_str()); diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 3f89ef6c..d7e70a71 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -84,6 +84,9 @@ struct BuildKernelInfo // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; + + // Whether to build with -cl-fp32-correctly-rounded-divide-sqrt. + bool correctlyRounded; }; // Data common to all math tests. diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 90731ea0..408a394a 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -375,8 +375,8 @@ const Func functionList[] = { { NULL }, 3.0f, 0.0f, - 0.0f, - 1.0f, + 1.5f, + 1.5f, 4.0f, INFINITY, INFINITY, diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index f0f2a4b6..a395488c 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -66,7 +66,6 @@ int gSkipCorrectnessTesting = 0; static int gStopOnError = 0; static bool gSkipRestOfTests; int gForceFTZ = 0; -int gWimpyMode = 0; int gHostFill = 0; static int gHasDouble = 0; static int gTestFloat = 1; @@ -82,7 +81,6 @@ static int gTestFastRelaxed = 1; OpenCL 2.0 spec then it has to be changed through a command line argument. */ int gFastRelaxedDerived = 1; -static int gToggleCorrectlyRoundedDivideSqrt = 0; int gHasHalf = 0; cl_device_fp_config gHalfCapabilities = 0; int gDeviceILogb0 = 1; @@ -385,21 +383,24 @@ int main(int argc, const char *argv[]) error = ParseArgs(argc, argv); if (error) return error; - // This takes a while, so prevent the machine from going to sleep. - PreventSleep(); - atexit(ResumeSleep); + if (!gListTests) + { + // This takes a while, so prevent the machine from going to sleep. + PreventSleep(); + atexit(ResumeSleep); - if (gSkipCorrectnessTesting) - vlog("*** Skipping correctness testing! ***\n\n"); - else if (gStopOnError) - vlog("Stopping at first error.\n"); + if (gSkipCorrectnessTesting) + vlog("*** Skipping correctness testing! ***\n\n"); + else if (gStopOnError) + vlog("Stopping at first error.\n"); - vlog(" \t "); - if (gWimpyMode) vlog(" "); - if (!gSkipCorrectnessTesting) vlog("\t max_ulps"); + vlog(" \t "); + if (gWimpyMode) vlog(" "); + if (!gSkipCorrectnessTesting) vlog("\t max_ulps"); - vlog("\n-------------------------------------------------------------------" - "----------------------------------------\n"); + vlog("\n---------------------------------------------------------------" + "--------------------------------------------\n"); + } gMTdata = MTdataHolder(gRandomSeed); @@ -426,6 +427,10 @@ int main(int argc, const char *argv[]) static int ParseArgs(int argc, const char **argv) { + if (gListTests) + { + return 0; + } // We only pass test names to runTestHarnessWithCheck, hence global command // line options defined by the harness cannot be used by the user. // To respect the implementation details of runTestHarnessWithCheck, @@ -469,8 +474,6 @@ static int ParseArgs(int argc, const char **argv) optionFound = 1; switch (*arg) { - case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break; - case 'd': gHasDouble ^= 1; break; case 'e': gFastRelaxedDerived ^= 1; break; @@ -498,10 +501,6 @@ static int ParseArgs(int argc, const char **argv) case 'v': gVerboseBruteForce ^= 1; break; - case 'w': // wimpy mode - gWimpyMode ^= 1; - break; - case '[': parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; @@ -581,14 +580,6 @@ static int ParseArgs(int argc, const char **argv) } } - // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) - { - vlog("\n"); - vlog("*** Detected CL_WIMPY_MODE env ***\n"); - gWimpyMode = 1; - } - PrintArch(); if (gWimpyMode) @@ -629,8 +620,6 @@ static void PrintUsage(void) { vlog("%s [-cglsz]: \n", appName); vlog("\toptions:\n"); - vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: " - "off)\n"); vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 " "on)\n"); vlog("\t\t-f\tToggle float precision testing. (Default: on)\n"); @@ -645,7 +634,6 @@ static void PrintUsage(void) "accuracy checks.)\n"); vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n"); vlog("\t\t-s\tStop on error\n"); - vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n"); vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is " "1-10, default factor(%u)\n", gWimpyReductionFactor); @@ -942,13 +930,6 @@ test_status InitCL(cl_device_id device) vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); - if (gToggleCorrectlyRoundedDivideSqrt) - { - gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; - } - vlog("\tTesting with correctly rounded float divide and sqrt? %s\n", - no_yes[0 - != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]); vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]); @@ -1330,8 +1311,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference) // reference is a normal power of two or a zero // The unbiased exponent of the ulp unit place - int ulp_exp = - DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1); + int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP); // allow correctly rounded results to pass through unmolested. (We might add // error to it below.) There is something of a performance optimization here diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index 4d312c1e..a66e6f7e 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -25,12 +25,10 @@ #include "utility.h" -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 #include #endif -#if defined(__SSE2__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64) #include #endif @@ -721,9 +719,9 @@ double reference_tanpi(double x) double z = reference_fabs(x); // if big and even -- caution: only works if x only has single precision - if (z >= HEX_DBL(+, 1, 0, +, 24)) + if (!(z < HEX_DBL(+, 1, 0, +, 24))) { - if (z == INFINITY) return x - x; // nan + if (!isfinite(z)) return x - x; // nan return reference_copysign( 0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. @@ -855,8 +853,7 @@ double reference_add(double x, double y) volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); @@ -953,8 +950,7 @@ double reference_subtract(double x, double y) { volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); @@ -970,8 +966,7 @@ double reference_multiply(double x, double y) { volatile float a = (float)x; volatile float b = (float)y; -#if defined(__SSE__) \ - || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) +#if defined(__SSE__) || _M_IX86_FP == 1 // defeat x87 __m128 va = _mm_set_ss((float)a); __m128 vb = _mm_set_ss((float)b); @@ -1223,6 +1218,8 @@ double reference_relaxed_exp2(double x) { return reference_exp2(x); } double reference_exp2(double x) { // Note: only suitable for verifying single precision. Doesn't have range of a // full double exp2 implementation. + if (isnan(x)) return x; + if (x == 0.0) return 1.0; // separate x into fractional and integer parts @@ -2781,7 +2778,7 @@ static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift) sticky |= (0 != l); l = 0; } - else + else if (shift > 0) { sticky |= (0 != (l << (64 - shift))); l >>= shift; @@ -3088,9 +3085,9 @@ long double reference_tanpil(long double x) long double z = reference_fabsl(x); // if big and even -- caution: only works if x only has single precision - if (z >= HEX_LDBL(+, 1, 0, +, 53)) + if (!(z < HEX_LDBL(+, 1, 0, +, 53))) { - if (z == INFINITY) return x - x; // nan + if (!isfinite(z)) return x - x; // nan return reference_copysignl( 0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n. @@ -5027,8 +5024,9 @@ static double reference_scalbn(double x, int n) u.d -= 1.0; e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022; } + if (n >= 2098) return reference_copysign(INFINITY, x); e += n; - if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x); + if (e >= 2047) return reference_copysign(INFINITY, x); if (e < -51 || n < -2097) return reference_copysign(0.0, x); if (e <= 0) { diff --git a/test_conformance/math_brute_force/ternary_half.cpp b/test_conformance/math_brute_force/ternary_half.cpp index 843ceaa0..08c39900 100644 --- a/test_conformance/math_brute_force/ternary_half.cpp +++ b/test_conformance/math_brute_force/ternary_half.cpp @@ -274,10 +274,10 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) if (skipNanInf) { if (overflow[j] || IsHalfInfinity(correct) - || IsHalfNaN(correct) || IsHalfInfinity(hp0[j]) - || IsHalfNaN(hp0[j]) || IsHalfInfinity(hp1[j]) - || IsHalfNaN(hp1[j]) || IsHalfInfinity(hp2[j]) - || IsHalfNaN(hp2[j])) + || isnan_fp(correct) || IsHalfInfinity(hp0[j]) + || isnan_fp(hp0[j]) || IsHalfInfinity(hp1[j]) + || isnan_fp(hp1[j]) || IsHalfInfinity(hp2[j]) + || isnan_fp(hp2[j])) continue; } @@ -318,9 +318,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } @@ -381,13 +381,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -474,13 +474,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -551,9 +551,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } @@ -613,13 +613,13 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3) + || isnan_fp(correct3) || IsHalfInfinity(correct4) - || IsHalfNaN(correct4) + || isnan_fp(correct4) || IsHalfInfinity(correct5) - || IsHalfNaN(correct5)) + || isnan_fp(correct5)) continue; } @@ -689,9 +689,9 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correct2) - || IsHalfNaN(correct2) + || isnan_fp(correct2) || IsHalfInfinity(correct3) - || IsHalfNaN(correct3)) + || isnan_fp(correct3)) continue; } diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index ee8a61b8..2761ab97 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) INFINITY; // out of range resut from finite inputs must be numeric } + bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp index 683e1492..63398028 100644 --- a/test_conformance/math_brute_force/unary_two_results_half.cpp +++ b/test_conformance/math_brute_force/unary_two_results_half.cpp @@ -249,9 +249,9 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) if (skipNanInf && overflow[j]) continue; // Note: no double rounding here. Reference functions // calculate in single precision. - if (IsHalfInfinity(correct1) || IsHalfNaN(correct1) - || IsHalfInfinity(correct2) || IsHalfNaN(correct2) - || IsHalfInfinity(pIn[j]) || IsHalfNaN(pIn[j])) + if (IsHalfInfinity(correct1) || isnan_fp(correct1) + || IsHalfInfinity(correct2) || isnan_fp(correct2) + || IsHalfInfinity(pIn[j]) || isnan_fp(pIn[j])) continue; } @@ -320,13 +320,13 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode) // Note: no double rounding here. Reference // functions calculate in single precision. if (IsHalfInfinity(correctp) - || IsHalfNaN(correctp) + || isnan_fp(correctp) || IsHalfInfinity(correctn) - || IsHalfNaN(correctn) + || isnan_fp(correctn) || IsHalfInfinity(correct2p) - || IsHalfNaN(correct2p) + || isnan_fp(correct2p) || IsHalfInfinity(correct2n) - || IsHalfNaN(correct2n)) + || isnan_fp(correct2n)) continue; } diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h index a43f3a64..f735f9d3 100644 --- a/test_conformance/math_brute_force/utility.h +++ b/test_conformance/math_brute_force/utility.h @@ -19,9 +19,11 @@ #include "harness/compat.h" #include "harness/rounding_mode.h" #include "harness/fpcontrol.h" +#include "harness/mathHelpers.h" #include "harness/testHarness.h" #include "harness/ThreadPool.h" #include "harness/conversions.h" +#include "harness/parseParameters.h" #include "CL/cl_half.h" #define BUFFER_SIZE (1024 * 1024 * 2) @@ -59,7 +61,6 @@ extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]; extern int gSkipCorrectnessTesting; extern int gForceFTZ; extern int gFastRelaxedDerived; -extern int gWimpyMode; extern int gHostFill; extern int gIsInRTZMode; extern int gHasHalf; @@ -172,16 +173,6 @@ inline int IsFloatNaN(double x) return ((u.u & 0x7fffffffU) > 0x7F800000U); } -inline bool IsHalfNaN(const cl_half v) -{ - // Extract FP16 exponent and mantissa - uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = ((cl_half)v) & 0x3FF; - - // NaN test - return (h_exp == 0x1F && h_mant != 0); -} - inline bool IsHalfInfinity(const cl_half v) { // Extract FP16 exponent and mantissa diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp index 0e9df56c..0b05ff16 100644 --- a/test_conformance/mem_host_flags/main.cpp +++ b/test_conformance/mem_host_flags/main.cpp @@ -31,7 +31,6 @@ int main(int argc, const char *argv[]) { - log_info("1st part, non gl-sharing objects...\n"); gTestRounding = true; return runTestHarness(argc, argv, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), false, 0); diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt index 7db5bcbb..6029dbbd 100644 --- a/test_conformance/non_uniform_work_group/CMakeLists.txt +++ b/test_conformance/non_uniform_work_group/CMakeLists.txt @@ -1,9 +1,5 @@ set(MODULE_NAME NON_UNIFORM_WORK_GROUP) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES main.cpp test_advanced_2d.cpp @@ -15,5 +11,3 @@ set(${MODULE_NAME}_SOURCES ) include(../CMakeCommon.txt) - -# end of file # diff --git a/test_conformance/non_uniform_work_group/test_advanced_2d.cpp b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp index 9c0ed964..f764bae8 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_2d.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -52,25 +54,31 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_two_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_prime_number_basic_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_combination_of_max_wg_size_basic @@ -83,56 +91,69 @@ REGISTER_TEST(non_uniform_2d_basic) // non_uniform_2d_two_prime_numbers_and_ls_null_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_prime_number_and_ls_null_basic { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_four_prime_numbers_basic { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_2d_three_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -160,11 +181,13 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -173,25 +196,31 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_two_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_prime_number_atomics_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_combination_of_max_wg_size_atomics @@ -204,56 +233,69 @@ REGISTER_TEST(non_uniform_2d_atomics) // non_uniform_2d_two_prime_numbers_and_ls_null_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_prime_number_and_ls_null_atomics { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_four_prime_numbers_atomics { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_2d_three_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -281,11 +323,13 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber, maxWgSize}; size_t localSize[] = {maxWgSize/2, 2}; @@ -294,25 +338,31 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_two_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 1759; - size_t globalSize[] = {primeNumber2, primeNumber}; - size_t localSize[] = {16, maxWgSize/16}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1759; + size_t globalSize[] = { primeNumber2, primeNumber }; + size_t localSize[] = { 16, maxWgSize / 16 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_prime_number_barriers_2 { - int primeNumber = 1327; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t localSize[] = {maxWgSize/32, 32}; + size_t primeNumber = 1327; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t localSize[] = { maxWgSize / 32, 32 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_combination_of_max_wg_size_barriers @@ -325,54 +375,67 @@ REGISTER_TEST(non_uniform_2d_barriers) // non_uniform_2d_two_prime_numbers_and_ls_null_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 1669; - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 1669; + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_prime_number_and_ls_null_barriers { - unsigned int primeNumber = 1249; - size_t globalSize[] = {primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 1249; + size_t globalSize[] = { primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_four_prime_numbers_barriers { - unsigned int primeNumber = 1951; - unsigned int primeNumber2 = 911; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2}; + size_t primeNumber = 1951; + size_t primeNumber2 = 911; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t globalSize[] = { primeNumber, primeNumber2 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_2d_three_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_advanced_3d.cpp b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp index a159c8d6..89a30b3f 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_3d.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_3d_basic) // non_uniform_3d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -52,95 +54,118 @@ REGISTER_TEST(non_uniform_3d_basic) // non_uniform_3d_two_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_prime_number_basic_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_two_prime_numbers_and_ls_null_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_prime_number_and_ls_null_basic { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_three_prime_numbers_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_four_prime_numbers_basic { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_3d_six_prime_numbers_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize ); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -168,11 +193,13 @@ REGISTER_TEST(non_uniform_3d_atomics) // non_uniform_3d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -181,95 +208,118 @@ REGISTER_TEST(non_uniform_3d_atomics) // non_uniform_3d_two_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_prime_number_atomics_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_two_prime_numbers_and_ls_null_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_prime_number_and_ls_null_atomics { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_three_prime_numbers_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_four_prime_numbers_atomics { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -297,11 +347,13 @@ REGISTER_TEST(non_uniform_3d_barriers) // non_uniform_3d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25}; size_t localSize[] = {2, std::max(maxWgSize/4,1), 2}; @@ -310,96 +362,119 @@ REGISTER_TEST(non_uniform_3d_barriers) // non_uniform_3d_two_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - int primeNumber2 = 13; - size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber}; - size_t localSize[] = {8, 4, std::max(maxWgSize/32,1)}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 13; + size_t globalSize[] = { primeNumber2, maxWgSize / 8, primeNumber }; + size_t localSize[] = { 8, 4, std::max(maxWgSize / 32, 1) }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_prime_number_barriers_2 { - int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t localSize[] = {8, std::max(maxWgSize/32,1), 4}; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t localSize[] = { 8, std::max(maxWgSize / 32, 1), 4 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_two_prime_numbers_and_ls_null_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 23; - size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16}; - size_t *localSize = NULL; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 23; + size_t globalSize[] = { primeNumber, primeNumber2, maxWgSize / 16 }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_prime_number_and_ls_null_barriers { - unsigned int primeNumber = 113; - size_t globalSize[] = {primeNumber, primeNumber, primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 113; + size_t globalSize[] = { primeNumber, primeNumber, primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_three_prime_numbers_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 10711; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3}; - size_t localSize[] = {primeNumber, 1, 1}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 10711; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3, primeNumber3 }; + size_t localSize[] = { primeNumber, 1, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_four_prime_numbers_barriers { - unsigned int primeNumber = 541; - unsigned int primeNumber2 = 251; - unsigned int primeNumber3 = 13; - unsigned int primeNumber4 = 17; - PrimeNumbers::Result2d fit2dResult; - fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); + size_t primeNumber = 541; + size_t primeNumber2 = 251; + size_t primeNumber3 = 13; + size_t primeNumber4 = 17; + PrimeNumbers::Result2d fit2dResult; + fit2dResult = + PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit2dResult.Val1, fit2dResult.Val2, 1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize ); + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_advanced_other.cpp b/test_conformance/non_uniform_work_group/test_advanced_other.cpp index 63df8c0c..628cc951 100644 --- a/test_conformance/non_uniform_work_group/test_advanced_other.cpp +++ b/test_conformance/non_uniform_work_group/test_advanced_other.cpp @@ -31,73 +31,88 @@ REGISTER_TEST(non_uniform_other_basic) // non_uniform_1d_two_prime_numbers_offset_basic { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_2d_three_prime_numbers_offset_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_3d_six_prime_numbers_offset_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + PrimeNumbers::Result3d fit3dResult; - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BASIC); } // non_uniform_3d_six_prime_numbers_rwgs_basic { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BASIC); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::BASIC); } return exec.status(); @@ -117,71 +132,86 @@ REGISTER_TEST(non_uniform_other_atomics) // non_uniform_1d_two_prime_numbers_offset_atomics { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_2d_three_prime_numbers_offset_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_offset_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::ATOMICS); } // non_uniform_3d_six_prime_numbers_rwgs_atomics { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::ATOMICS); } return exec.status(); @@ -201,74 +231,89 @@ REGISTER_TEST(non_uniform_other_barriers) // non_uniform_1d_two_prime_numbers_offset_barriers { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; + size_t globalSize[] = { primeNumber }; - size_t localSize[] = {fit1dResult.Val1}; - size_t offset[] = {23}; + size_t localSize[] = { fit1dResult.Val1 }; + size_t offset[] = { 23 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_2d_three_prime_numbers_offset_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } - unsigned int primeNumber2 = 42967; - unsigned int primeNumber3 = 13; - size_t globalSize[] = {primeNumber2, primeNumber3}; - size_t localSize[] = {primeNumber, 1}; - size_t offset[] = {23, 17}; + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize / 2, maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } + size_t primeNumber2 = 42967; + size_t primeNumber3 = 13; + size_t globalSize[] = { primeNumber2, primeNumber3 }; + size_t localSize[] = { primeNumber, 1 }; + size_t offset[] = { 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_offset_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t offset[] = {11, 23, 17}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t offset[] = { 11, 23, 17 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + offset, NULL, Range::BARRIERS); } // non_uniform_3d_six_prime_numbers_rwgs_barriers { - unsigned int primeNumber = 373; - unsigned int primeNumber2 = 13; - unsigned int primeNumber3 = 279; - unsigned int primeNumber4 = 3; - unsigned int primeNumber5 = 5; - unsigned int primeNumber6 = 7; - PrimeNumbers::Result3d fit3dResult; + size_t primeNumber = 373; + size_t primeNumber2 = 13; + size_t primeNumber3 = 279; + size_t primeNumber4 = 3; + size_t primeNumber5 = 5; + size_t primeNumber6 = 7; + PrimeNumbers::Result3d fit3dResult; - fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize ); + fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, + primeNumber6, maxWgSize); - size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3}; + size_t globalSize[] = { primeNumber, primeNumber2, primeNumber3 }; - size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; - size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3}; + size_t localSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; + size_t reqdWorkGroupSize[] = { fit3dResult.Val1, fit3dResult.Val2, + fit3dResult.Val3 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup( + sizeof(globalSize) / sizeof(globalSize[0]), globalSize, localSize, + NULL, reqdWorkGroupSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/test_basic.cpp b/test_conformance/non_uniform_work_group/test_basic.cpp index 537d7eeb..6abf0870 100644 --- a/test_conformance/non_uniform_work_group/test_basic.cpp +++ b/test_conformance/non_uniform_work_group/test_basic.cpp @@ -39,11 +39,13 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_basic { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -52,20 +54,24 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_max_wg_size_plus_prime_number_basic { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_max_wg_size_plus_prime_number_basic_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_2max_wg_size_minus_1_basic @@ -78,38 +84,46 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_basic_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_basic_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_basic_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_prime_number_and_ls_null_basic_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -118,25 +132,29 @@ REGISTER_TEST(non_uniform_1d_basic) // non_uniform_1d_prime_number_and_ls_null_basic_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } // non_uniform_1d_two_prime_numbers_basic { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BASIC); } return exec.status(); @@ -164,11 +182,13 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_atomics { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -177,20 +197,24 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_max_wg_size_plus_prime_number_atomics { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_max_wg_size_plus_prime_number_atomics_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_2max_wg_size_minus_1_atomics @@ -203,38 +227,46 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_atomics_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_atomics_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_atomics_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_prime_number_and_ls_null_atomics_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -243,25 +275,29 @@ REGISTER_TEST(non_uniform_1d_atomics) // non_uniform_1d_prime_number_and_ls_null_atomics_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } // non_uniform_1d_two_prime_numbers_atomics { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::ATOMICS); } return exec.status(); @@ -289,11 +325,13 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_barriers { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t localSize[] = {maxWgSize}; @@ -302,20 +340,24 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_max_wg_size_plus_prime_number_barriers { - int primeNumber = 11; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 11; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_max_wg_size_plus_prime_number_barriers_2 { - int primeNumber = 53; - size_t globalSize[] = {maxWgSize+primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 53; + size_t globalSize[] = { maxWgSize + primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_2max_wg_size_minus_1_barriers @@ -328,38 +370,46 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_barriers_2 { - unsigned int primeNumber = 20101; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 20101; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_barriers_3 { - unsigned int primeNumber = 42967; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 42967; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_barriers_4 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {maxWgSize}; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { maxWgSize }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_prime_number_and_ls_null_barriers_2 { - int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize); - if (primeNumber < 1) { - log_error ("Cannot find proper prime number."); - return -1; - } + size_t primeNumber = + PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2 * maxWgSize); + if (primeNumber < 1) + { + log_error("Cannot find proper prime number."); + return -1; + } size_t globalSize[] = {primeNumber}; size_t *localSize = NULL; @@ -368,26 +418,30 @@ REGISTER_TEST(non_uniform_1d_barriers) // non_uniform_1d_prime_number_and_ls_null_barriers_3 { - unsigned int primeNumber = 65521; - size_t globalSize[] = {primeNumber}; - size_t *localSize = NULL; + size_t primeNumber = 65521; + size_t globalSize[] = { primeNumber }; + size_t *localSize = NULL; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } // non_uniform_1d_two_prime_numbers_barriers { - unsigned int primeNumber = 42967; - unsigned int primeNumber2 = 113; + size_t primeNumber = 42967; + size_t primeNumber2 = 113; - PrimeNumbers::Result1d fit1dResult; + PrimeNumbers::Result1d fit1dResult; - fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize ); + fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize); - size_t globalSize[] = {primeNumber}; - size_t localSize[] = {fit1dResult.Val1}; + size_t globalSize[] = { primeNumber }; + size_t localSize[] = { fit1dResult.Val1 }; - exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS); + exec.runTestNonUniformWorkGroup(sizeof(globalSize) + / sizeof(globalSize[0]), + globalSize, localSize, Range::BARRIERS); } return exec.status(); diff --git a/test_conformance/non_uniform_work_group/tools.cpp b/test_conformance/non_uniform_work_group/tools.cpp index 9c0f8f6d..7cbd3500 100644 --- a/test_conformance/non_uniform_work_group/tools.cpp +++ b/test_conformance/non_uniform_work_group/tools.cpp @@ -46,28 +46,31 @@ void PrimeNumbers::generatePrimeNumbers (unsigned int maxValue) { } // Returns prime number for specified range -int PrimeNumbers::getPrimeNumberInRange (size_t lowerValue, size_t higherValue) { - if(lowerValue >= higherValue) - return -1; +size_t PrimeNumbers::getPrimeNumberInRange(size_t lowerValue, + size_t higherValue) +{ + if (lowerValue >= higherValue) return 0; - if(primeNumbers.back() < lowerValue) - return -2; + if (primeNumbers.back() < lowerValue) return 0; - PrimeNumbersCollection::iterator it = primeNumbers.begin(); + PrimeNumbersCollection::iterator it = primeNumbers.begin(); - for (; it != primeNumbers.end(); ++it) { - if (lowerValue<*it) { - if(higherValue>*it) - return *it; - else - return -3; + for (; it != primeNumbers.end(); ++it) + { + if (lowerValue < *it) + { + if (higherValue > *it) + return *it; + else + return 0; + } } - } - return -1; + return 0; } -int PrimeNumbers::getNextLowerPrimeNumber(size_t upperValue) { +size_t PrimeNumbers::getNextLowerPrimeNumber(size_t upperValue) +{ size_t retVal = 1; PrimeNumbersCollection::iterator it = primeNumbers.begin(); diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h index 8e235c3a..29d31d39 100644 --- a/test_conformance/non_uniform_work_group/tools.h +++ b/test_conformance/non_uniform_work_group/tools.h @@ -23,8 +23,7 @@ #include #include -typedef std::vector PrimeNumbersCollection; - +typedef std::vector PrimeNumbersCollection; // Class responsible for distributing prime numbers @@ -47,8 +46,8 @@ public: }; static void generatePrimeNumbers (unsigned int maxValue); - static int getPrimeNumberInRange (size_t lowerValue, size_t higherValue); - static int getNextLowerPrimeNumber (size_t upperValue); + static size_t getPrimeNumberInRange(size_t lowerValue, size_t higherValue); + static size_t getNextLowerPrimeNumber(size_t upperValue); static Result1d fitMaxPrime1d(size_t Val1, size_t productMax); // Return val1 and Val2 which are largest prime numbers who's product is <= productMax static Result2d fitMaxPrime2d(size_t Val1, size_t Val2, size_t productMax); diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp index 38333175..9cd9db00 100644 --- a/test_conformance/printf/test_printf.cpp +++ b/test_conformance/printf/test_printf.cpp @@ -1101,20 +1101,19 @@ int main(int argc, const char* argv[]) argCount, argList, test_registry::getInstance().num_tests(), test_registry::getInstance().definitions(), true, 0, InitCL); - if(gQueue) + if (gQueue) { int error = clFinish(gQueue); - if (error) { + if (error) + { log_error("clFinish failed: %d\n", error); } + if (clReleaseCommandQueue(gQueue) != CL_SUCCESS) + log_error("clReleaseCommandQueue\n"); } - - if(clReleaseCommandQueue(gQueue)!=CL_SUCCESS) - log_error("clReleaseCommandQueue\n"); - if(clReleaseContext(gContext)!= CL_SUCCESS) + if (gContext && clReleaseContext(gContext) != CL_SUCCESS) log_error("clReleaseContext\n"); - free(argList); remove(gFileName); return err; diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp index f982f15e..83a21fbb 100644 --- a/test_conformance/printf/util_printf.cpp +++ b/test_conformance/printf/util_printf.cpp @@ -1352,7 +1352,7 @@ std::vector correctBufferVectorRTZ = { "1.23e+03,9.87e+05,4.99e-04", - "0x1p-2,0x1p-1,0x1p+0,0x1.8p+0", + "0x1.0p-2,0x1.0p-1,0x1.0p+0,0x1.8p+0", "1,2,3,4,1.5,3.13999,2.5,3.5", diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp index d3532ceb..7a711e6f 100644 --- a/test_conformance/profiling/execute_multipass.cpp +++ b/test_conformance/profiling/execute_multipass.cpp @@ -24,6 +24,7 @@ #include "procs.h" #include "harness/testHarness.h" +#include "harness/typeWrappers.h" #include "harness/errorHelpers.h" static const char *read3d_kernel_code = @@ -90,11 +91,11 @@ static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr ) { - cl_program program[1]; - cl_kernel kernel[1]; - cl_mem memobjs[2]; + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper memobjs[2]; cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 }; - cl_event executeEvent = NULL; + clEventWrapper executeEvent = NULL; cl_ulong queueStart, submitStart, writeStart, writeEnd; size_t threads[3]; size_t localThreads[3]; @@ -106,20 +107,34 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue threads[1] = h; threads[2] = d; + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); + test_error(err, "clCreateSampler failed"); + + // allocate the input and output image memory objects + memobjs[0] = + create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &image_format_desc, w, h, d, 0, 0, inptr, &err); + test_error(err, "unable to create 3D image using create_image_3d"); + + // allocate an array memory object to load the filter weights + size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels; + memobjs[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err); + test_error(err, "unable to create array using clCreateBuffer"); + + // create the compute program + err = create_single_kernel_helper(context, &program, &kernel, 1, + &read3d_kernel_code, "read3d"); + test_error(err, "create_single_kernel_helper failed"); + err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(size_t), (size_t *)localThreads, NULL); - if (err) - { - log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed\n"); - return -1; - } - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), - &maxWorkgroupSize, NULL); - if (err) - { - log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed\n"); - return -1; - } + test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed"); + err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &maxWorkgroupSize, NULL); + test_error(err, "clGetDeviceInfo(CL_KERNEL_WORK_GROUP_SIZE) failed\n"); + localThreads[0] = std::min({ localThreads[0], threads[0], maxWorkgroupSize }); localThreads[1] = std::min( @@ -128,121 +143,44 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue std::min({ localThreads[2], threads[2], maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); - cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err ); - if( err ){ - log_error( " clCreateSampler failed.\n" ); - return -1; - } - - // allocate the input and output image memory objects - memobjs[0] = - create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &image_format_desc, w, h, d, 0, 0, inptr, &err); - if( memobjs[0] == (cl_mem)0 ){ - log_error( " unable to create 2D image using create_image_2d\n" ); - return -1; - } - - // allocate an array memory object to load the filter weights - size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels; - memobjs[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err); - if( memobjs[1] == (cl_mem)0 ){ - log_error( " unable to create array using clCreateBuffer\n" ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - // create the compute program - err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" ); - if( err ){ - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - // create kernel args object and set arg values. // set the args values - err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] ); - err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] ); - err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler); + err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]); + err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); + test_error(err, "clSetKernelArg failed"); - if( err != CL_SUCCESS ){ - print_error( err, "clSetKernelArg failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, 0, NULL, &executeEvent ); - - if( err != CL_SUCCESS ){ - print_error( err, "clEnqueueNDRangeKernel failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, localThreads, + 0, NULL, &executeEvent); + test_error(err, "clEnqueueNDRangeKernel failed"); if (executeEvent) { // This synchronization point is needed in order to assume the data is valid. // Getting profiling information is not a synchronization point. err = clWaitForEvents( 1, &executeEvent ); - if( err != CL_SUCCESS ) - { - print_error( err, "clWaitForEvents failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clWaitForEvents failed"); // test profiling - while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + while ((err = clGetEventProfilingInfo( + executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), + &queueStart, NULL)) + == CL_PROFILING_INFO_NOT_AVAILABLE) + ; + test_error(err, "clGetEventProfilingInfo failed"); - while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + while ((err = clGetEventProfilingInfo( + executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), + &submitStart, NULL)) + == CL_PROFILING_INFO_NOT_AVAILABLE) + ; + test_error(err, "clGetEventProfilingInfo failed"); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clGetEventProfilingInfo failed"); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL ); - if( err != CL_SUCCESS ){ - print_error( err, "clGetEventProfilingInfo failed" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } + test_error(err, "clGetEventProfilingInfo failed"); log_info( "Profiling info:\n" ); log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f ); @@ -252,23 +190,9 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue // read output image err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, outptr_size, outptr, 0, NULL, NULL); - if( err != CL_SUCCESS ){ - print_error( err, "clReadImage failed\n" ); - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); - return -1; - } - - // release kernel, program, and memory objects - clReleaseKernel( kernel[0] ); - clReleaseProgram( program[0] ); - clReleaseMemObject( memobjs[1] ); - clReleaseMemObject( memobjs[0] ); + test_error(err, "clReadImage failed"); return err; - } // end run_kernel() diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index 66ab0729..79de562a 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,7 @@ #include #include +#include "harness/mathHelpers.h" #include "harness/stringHelpers.h" #include @@ -368,9 +369,8 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, { if (gInfNanSupport == 0) { - float a = inDataA[i * vecSize + j]; - float b = inDataB[i * vecSize + j]; - if (isnan(a) || isnan(b)) + if (isnan_fp(inDataA[i * vecSize + j]) + || isnan_fp(inDataB[i * vecSize + j])) fail = 0; else fail = 1; diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 20f5bd5e..bec85e12 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -89,7 +89,6 @@ static void printUsage( void ); // test their entire range and 64 bits test will test the 32 bit // range. Otherwise, we test a subset of the range // [-min_short, min_short] -static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; //----------------------------------------- @@ -141,8 +140,9 @@ static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, break; } case 4: { - if (!s_wimpy_mode) { - uint32_t* ui = (uint32_t *)cmp; + if (!gWimpyMode) + { + uint32_t *ui = (uint32_t *)cmp; for (size_t i = 0; i < count; ++i) ui[i] = (uint32_t)start++; } else { @@ -323,7 +323,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; const size_t block_elements = BUFFER_SIZE / type_size[stype]; - size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; + size_t step = gWimpyMode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; // It is more efficient to create the tests all at once since we @@ -474,7 +474,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } // for vecsize } // for i - if (!s_wimpy_mode) + if (!gWimpyMode) log_info(" Passed\n\n"); else log_info(" Wimpy Passed\n\n"); @@ -603,13 +603,9 @@ int main(int argc, const char* argv[]) arg++; while(*arg != '\0') { - switch(*arg) { - case 'h': - printUsage(); - return 0; - case 'w': - s_wimpy_mode = true; - break; + switch (*arg) + { + case 'h': printUsage(); return 0; case '[': parseWimpyReductionFactor(arg, s_wimpy_reduction_factor); break; @@ -626,11 +622,8 @@ int main(int argc, const char* argv[]) } } - if (getenv("CL_WIMPY_MODE")) { - s_wimpy_mode = true; - } - - if (s_wimpy_mode) { + if (gWimpyMode && !gListTests) + { log_info("\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n"); log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n"); @@ -651,7 +644,6 @@ static void printUsage( void ) { log_info("test_select: [-w] \n"); log_info("\tdefault is to run the full test on the default device\n"); - log_info("\t-w run in wimpy mode (smoke test)\n"); log_info("\t-[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor); log_info("\n"); log_info("Test names:\n"); diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index a685b7f6..71653a87 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -14,6 +14,8 @@ // limitations under the License. // #include "harness/errorHelpers.h" +#include "harness/mathHelpers.h" +#include "harness/testHarness.h" #include #include @@ -834,9 +836,9 @@ size_t check_half(const void *const test, const void *const correct, // Allow nans to be binary different for (i = 0; i < count; i++) { - float fcorrect = cl_half_to_float(c[i]); - float ftest = cl_half_to_float(t[i]); - if ((t[i] != c[i]) && !(isnan(fcorrect) && isnan(ftest))) + if ((t[i] != c[i]) + && !(isnan_fp(cl_half_to_float(c[i])) + && isnan_fp(cl_half_to_float(t[i])))) { log_error("\n(check_half) Error for vector size %zu found at " "0x%8.8zx (of 0x%8.8zx): " diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 77754a2f..4e7c0ec5 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -6713,6 +6713,14 @@ cl_device_id get_platform_device (cl_device_type device_type, cl_uint choosen_de return devices[choosen_device_index]; } +static void ListTests() +{ + for (unsigned int i = 0; i < (sizeof(spir_suites) / sizeof(sub_suite)); i++) + { + log_info("\t%s\n", spir_suites[i].name); + } +} + /** Parses the command line parameters and set the @@ -6761,7 +6769,7 @@ static int ParseCommandLine (int argc, const char *argv[], /* Process the command line arguments */ /* Special case: just list the tests */ - if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" ))) + if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { log_info( "Usage: %s [] [pid] [id] [] [w32] [no-unzip]\n", argv[0] ); log_info( "\t\tOne or more of: (default all)\n"); @@ -6771,10 +6779,13 @@ static int ParseCommandLine (int argc, const char *argv[], log_info( "\tw32\t\tIndicates device address bits is 32.\n" ); log_info( "\tno-unzip\t\tDo not extract test files from Zip; use existing.\n" ); - for( unsigned int i = 0; i < (sizeof(spir_suites) / sizeof(sub_suite)); i++ ) - { - log_info( "\t\t%s\n", spir_suites[i].name ); - } + ListTests(); + return 0; + } + else if ((argc > 1) + && (!strcmp(argv[1], "--list") || !strcmp(argv[1], "-list"))) + { + ListTests(); return 0; } @@ -6911,12 +6922,13 @@ int main (int argc, const char* argv[]) cl_device_id device = get_platform_device(device_type, choosen_device_index, choosen_platform_index); printDeviceHeader(device); + REQUIRE_EXTENSION("cl_khr_spir"); + std::vector versions; get_spir_version(device, versions); - if (!is_extension_available(device, "cl_khr_spir") - || (std::find(versions.begin(), versions.end(), Version{ 1, 2 }) - == versions.end())) + if (std::find(versions.begin(), versions.end(), Version{ 1, 2 }) + == versions.end()) { log_info("Spir extension version 1.2 is not supported by the device\n"); return 0; diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt index 13a258e5..e32d600b 100644 --- a/test_conformance/spirv_new/CMakeLists.txt +++ b/test_conformance/spirv_new/CMakeLists.txt @@ -38,10 +38,6 @@ set(TEST_HARNESS_SOURCES ../../test_conformance/math_brute_force/utility.cpp ) -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang") - add_cxx_flag_if_supported(-Wno-narrowing) -endif() - set(${MODULE_NAME}_SOURCES ${${MODULE_NAME}_SOURCES} ${TEST_HARNESS_SOURCES}) include(../CMakeCommon.txt) diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp index 98ce18e8..b5421f92 100644 --- a/test_conformance/spirv_new/main.cpp +++ b/test_conformance/spirv_new/main.cpp @@ -216,6 +216,7 @@ int main(int argc, const char *argv[]) { gReSeed = 1; bool modifiedSpvBinariesPath = false; + bool listTests = false; for (int i = 0; i < argc; ++i) { int argsRemoveNum = 0; if (argv[i] == spvBinariesPathArg) { @@ -241,9 +242,12 @@ int main(int argc, const char *argv[]) argc -= argsRemoveNum; --i; } + listTests |= (argv[i] == std::string("--list") + || argv[i] == std::string("-list")); } - if (modifiedSpvBinariesPath == false) { - printUsage(); + if (modifiedSpvBinariesPath == false && !listTests) + { + printUsage(); } return runTestHarnessWithCheck( diff --git a/test_conformance/spirv_new/testBase.h b/test_conformance/spirv_new/testBase.h index 54fe15bd..5ea415a9 100644 --- a/test_conformance/spirv_new/testBase.h +++ b/test_conformance/spirv_new/testBase.h @@ -20,6 +20,7 @@ #define _testBase_h #include "harness/compat.h" +#include "harness/mathHelpers.h" #include "harness/rounding_mode.h" #include diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp index fc9fc522..f9380611 100644 --- a/test_conformance/spirv_new/test_decorate.cpp +++ b/test_conformance/spirv_new/test_decorate.cpp @@ -231,7 +231,7 @@ static inline f = cl_half_to_float(cl_half_from_float(f, half_rounding)); To val = static_cast(std::min(std::max(f, loVal), hiVal)); - if (isnan(cl_half_to_float(rhs))) + if (isnan_fp(rhs)) { val = 0; } diff --git a/test_conformance/spirv_new/test_spirv_14.cpp b/test_conformance/spirv_new/test_spirv_14.cpp index 6fc5e864..01df8dfd 100644 --- a/test_conformance/spirv_new/test_spirv_14.cpp +++ b/test_conformance/spirv_new/test_spirv_14.cpp @@ -26,10 +26,10 @@ static int test_image_operand_helper(cl_device_id deviceID, cl_context context, { const char* filename = signExtend ? "spv1.4/image_operand_signextend" : "spv1.4/image_operand_zeroextend"; - cl_image_format image_format = { - CL_RGBA, - signExtend ? CL_SIGNED_INT8 : CL_UNSIGNED_INT8, - }; + cl_image_format image_format = {}; + image_format.image_channel_order = CL_RGBA; + image_format.image_channel_data_type = + signExtend ? CL_SIGNED_INT8 : CL_UNSIGNED_INT8; cl_int error = CL_SUCCESS; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index ab8ee797..f234eef4 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -1609,6 +1609,7 @@ template struct subgroup_test // Generate the desired input for the kernel test_params.subgroup_size = subgroup_size; + test_params.local_workgroup_size = local; Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params); test_status status = TEST_FAIL; diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 69976c50..04f90e17 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -896,8 +896,21 @@ REGISTER_TEST(subgroup_functions_ballot) return TEST_SKIPPED_ITSELF; } - constexpr size_t global_work_size = 170; - constexpr size_t local_work_size = 64; + int error = 0; + + // Non-uniform work-groups are an optional feature from 3.0 onward. + cl_bool device_supports_non_uniform_wg = CL_TRUE; + if (get_device_cl_version(device) >= Version(3, 0)) + { + error = clGetDeviceInfo( + device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), + &device_supports_non_uniform_wg, nullptr); + test_error(error, "clGetDeviceInfo failed"); + } + + const size_t global_work_size = device_supports_non_uniform_wg ? 170 : 192; + const size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size); test_params.save_kernel_source(sub_group_ballot_mask_source); test_params.save_kernel_source(sub_group_non_uniform_broadcast_source, @@ -907,7 +920,7 @@ REGISTER_TEST(subgroup_functions_ballot) RunTestForType rft(device, context, queue, num_elements, test_params); // non uniform broadcast functions - int error = run_non_uniform_broadcast_for_type(rft); + error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); error |= run_non_uniform_broadcast_for_type(rft); diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp index 74e9144e..b37c1db9 100644 --- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp +++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp @@ -261,15 +261,28 @@ REGISTER_TEST(subgroup_functions_non_uniform_vote) return TEST_SKIPPED_ITSELF; } - constexpr size_t global_work_size = 170; - constexpr size_t local_work_size = 64; + int error = 0; + + // Non-uniform work-groups are an optional feature from 3.0 onward. + cl_bool device_supports_non_uniform_wg = CL_TRUE; + if (get_device_cl_version(device) >= Version(3, 0)) + { + error = clGetDeviceInfo( + device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), + &device_supports_non_uniform_wg, nullptr); + test_error(error, "clGetDeviceInfo failed"); + } + + const size_t global_work_size = device_supports_non_uniform_wg ? 170 : 192; + const size_t local_work_size = 64; + WorkGroupParams test_params(global_work_size, local_work_size, 3); test_params.save_kernel_source( sub_group_non_uniform_any_all_all_equal_source); test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect"); RunTestForType rft(device, context, queue, num_elements, test_params); - int error = run_vote_all_equal_for_type(rft); + error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft); error |= run_vote_all_equal_for_type(rft); diff --git a/test_conformance/thread_dimensions/test_thread_dimensions.cpp b/test_conformance/thread_dimensions/test_thread_dimensions.cpp index ed163ac7..4dd8c7ae 100644 --- a/test_conformance/thread_dimensions/test_thread_dimensions.cpp +++ b/test_conformance/thread_dimensions/test_thread_dimensions.cpp @@ -19,6 +19,7 @@ #include "harness/errorHelpers.h" #include "harness/conversions.h" #include "harness/mt19937.h" +#include "harness/parseParameters.h" #include #include @@ -524,9 +525,9 @@ int test_thread_dimensions(cl_device_id device, cl_context context, cl_uint max_x_size = 1, min_x_size = 1, max_y_size = 1, min_y_size = 1, max_z_size = 1, min_z_size = 1; - if (getenv("CL_WIMPY_MODE") && !quick_test) + if (gWimpyMode && !quick_test) { - log_info("CL_WIMPY_MODE enabled, skipping test\n"); + log_info("Wimpy mode enabled, skipping test\n"); return 0; } diff --git a/test_conformance/vectors/defines.h b/test_conformance/vectors/defines.h index 0ea0b00d..7068cf18 100644 --- a/test_conformance/vectors/defines.h +++ b/test_conformance/vectors/defines.h @@ -26,7 +26,6 @@ extern int g_arrVecSizes[NUM_VECTOR_SIZES]; extern int g_arrVecSteps[NUM_VECTOR_SIZES]; -extern bool g_wimpyMode; extern const char *g_arrVecSizeNames[NUM_VECTOR_SIZES]; extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES]; diff --git a/test_conformance/vectors/globals.cpp b/test_conformance/vectors/globals.cpp index 6dee6d96..b7da9b6e 100644 --- a/test_conformance/vectors/globals.cpp +++ b/test_conformance/vectors/globals.cpp @@ -32,8 +32,6 @@ size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = { (size_t)0xf // 16 }; -bool g_wimpyMode = false; - ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp index 0098e414..c0757017 100644 --- a/test_conformance/vectors/structs.cpp +++ b/test_conformance/vectors/structs.cpp @@ -18,6 +18,8 @@ #include "defines.h" +#include "harness/parseParameters.h" + #define DEBUG_MEM_ALLOC 0 /** typedef struct _bufferStruct @@ -223,7 +225,7 @@ void initContents(bufferStruct *pBufferStruct, clState *pClState, break; } case 4: { - if (!g_wimpyMode) + if (!gWimpyMode) { uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn); for (i = 0; i < countIn; ++i) diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt index e658b31c..c057e84c 100644 --- a/test_conformance/vulkan/CMakeLists.txt +++ b/test_conformance/vulkan/CMakeLists.txt @@ -2,7 +2,6 @@ set (MODULE_NAME VULKAN) list(APPEND CLConform_LIBRARIES vulkan_wrapper) set(CMAKE_COMPILE_WARNING_AS_ERROR OFF) -set(CMAKE_CXX_FLAGS "-fpermissive") if(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR") endif(WIN32) diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index aa32dd3a..85c1c7db 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -50,7 +50,7 @@ static void printUsage(const char *execName) log_info("Usage: %s [test_names] [options]\n", execName); log_info("Test names:\n"); - for (int i = 0; i < test_registry::getInstance().num_tests(); i++) + for (unsigned int i = 0; i < test_registry::getInstance().num_tests(); i++) { log_info("\t%s\n", test_registry::getInstance().definitions()[i].name); } diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index 7410cc7f..bd19987b 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -84,8 +85,8 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase vkDeviceMem->bindBuffer(vkBufferList[0], 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, @@ -236,15 +237,15 @@ struct ConsistencyExternalImageTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %ld\n", vkImage2D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage2D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage2D, memoryTypeList[0], vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage2D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, @@ -386,9 +387,9 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase VulkanSemaphore vkCl2Vksemaphore(*vkDevice, semaphoreHandleType); cl_semaphore_khr clCl2Vksemaphore; cl_semaphore_khr clVk2Clsemaphore; - void* handle1 = NULL; - void* handle2 = NULL; - int fd1, fd2; + [[maybe_unused]] void* handle1 = NULL; + [[maybe_unused]] void* handle2 = NULL; + [[maybe_unused]] int fd1, fd2; std::vector sema_props1{ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR, diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp index b6797f0c..c979fb19 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -94,15 +95,15 @@ struct ConsistencyExternalImage1DTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %lu\n", vkImage1D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage1D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage1D, memoryTypeList[0], vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage1D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp index e84954e9..a21eea7b 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include "harness/deviceInfo.h" @@ -96,15 +97,15 @@ struct ConsistencyExternalImage3DTest : public VulkanTestBase log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); - log_info("Image size : %lu\n", vkImage3D.getSize()); + log_info("Image size : %" PRIu64 "\n", vkImage3D.getSize()); VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(*vkDevice, vkImage3D, memoryTypeList[0], vkExternalMemoryHandleType); vkDeviceMem->bindImage(vkImage3D, 0); - void* handle = NULL; - int fd; + [[maybe_unused]] void* handle = NULL; + [[maybe_unused]] int fd; std::vector extMemProperties{ (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, (cl_mem_properties)device, diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 5f7c7f48..e39641f1 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -34,7 +34,6 @@ namespace { -cl_uchar uuid[CL_UUID_SIZE_KHR]; cl_device_id deviceId = nullptr; struct Params @@ -86,10 +85,10 @@ const char *kernel_text_verify = " \ int run_test_with_two_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel, cl_kernel &verify_kernel, - VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, - bool use_fence, + cl_context context, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + uint32_t bufferSize, bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { int err = CL_SUCCESS; @@ -211,7 +210,6 @@ int run_test_with_two_queue( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); @@ -274,7 +272,7 @@ int run_test_with_two_queue( err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem), (void *)&(buffers[0])); - for (int i = 0; i < vkBufferList.size() - 1; i++) + for (size_t i = 0; i < vkBufferList.size() - 1; i++) { err |= clSetKernelArg(update_buffer_kernel, i + 1, @@ -354,7 +352,7 @@ int run_test_with_two_queue( "Error: Failed read output, error\n"); int calc_max_iter; - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { if (i == 0) calc_max_iter = (maxIter * 3); @@ -430,8 +428,8 @@ CLEANUP: } int run_test_with_one_queue( - cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, - cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + cl_context context, cl_command_queue cmd_queue1, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType, bool use_fence) @@ -545,7 +543,6 @@ int run_test_with_one_queue( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); @@ -605,7 +602,7 @@ int run_test_with_one_queue( err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err |= clSetKernelArg(update_buffer_kernel, i + 1, @@ -665,7 +662,7 @@ int run_test_with_one_queue( "Error: clEnqueueWriteBuffer \n"); int calc_max_iter = (maxIter * 2); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers[i])); @@ -735,8 +732,8 @@ CLEANUP: } int run_test_with_multi_import_same_ctx( - cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, - cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, + cl_context context, cl_command_queue cmd_queue1, clKernelWrapper *kernel, + cl_kernel verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { @@ -839,7 +836,7 @@ int run_test_with_multi_import_same_ctx( vkExternalMemoryHandleType)); std::vector pExternalMemory; - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { pExternalMemory.push_back( new clExternalMemory(vkBufferListDeviceMemory[bIdx], @@ -858,10 +855,9 @@ int run_test_with_multi_import_same_ctx( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { buffers[bIdx][cl_bIdx] = externalMemory[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); @@ -920,7 +916,7 @@ int run_test_with_multi_import_same_ctx( { err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err |= clSetKernelArg( update_buffer_kernel, i + 1, sizeof(cl_mem), @@ -943,7 +939,7 @@ int run_test_with_multi_import_same_ctx( "Error: Failed to launch " "update_buffer_kernel, error\n "); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue1, 1, &buffers[i][launchIter], 0, nullptr, @@ -983,7 +979,7 @@ int run_test_with_multi_import_same_ctx( calc_max_iter = maxIter * (numImports + 1); - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers[i][0])); @@ -1018,7 +1014,7 @@ int run_test_with_multi_import_same_ctx( } for (size_t i = 0; i < vkBufferList.size(); i++) { - for (size_t j = 0; j < numImports; j++) + for (int j = 0; j < numImports; j++) { delete externalMemory[i][j]; } @@ -1068,10 +1064,11 @@ CLEANUP: } int run_test_with_multi_import_diff_ctx( - cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, - cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize, bool use_fence, + cl_context context, cl_context context2, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel1, + clKernelWrapper *kernel2, cl_kernel verify_kernel, cl_kernel verify_kernel2, + VulkanDevice &vkDevice, uint32_t numBuffers, uint32_t bufferSize, + bool use_fence, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { size_t global_work_size[1]; @@ -1087,7 +1084,6 @@ int run_test_with_multi_import_diff_ctx( clExternalExportableSemaphore *clCl2VkExternalSemaphore2 = nullptr; int err = CL_SUCCESS; int calc_max_iter; - bool withOffset; uint32_t pBufferSize; const std::vector @@ -1180,7 +1176,6 @@ int run_test_with_multi_import_diff_ctx( pBufferSize = bufferSize; VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize, vkExternalMemoryHandleType); - uint32_t interBufferOffset = (uint32_t)(vkBufferList[0].getSize()); for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { @@ -1189,7 +1184,7 @@ int run_test_with_multi_import_diff_ctx( vkExternalMemoryHandleType)); std::vector pExternalMemory1; std::vector pExternalMemory2; - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { pExternalMemory1.push_back( new clExternalMemory(vkBufferListDeviceMemory[bIdx], @@ -1212,10 +1207,9 @@ int run_test_with_multi_import_diff_ctx( vkDescriptorSet.update(0, vkParamsBuffer); for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++) { - size_t buffer_size = vkBufferList[bIdx].getSize(); vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); - for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) + for (int cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++) { buffers1[bIdx][cl_bIdx] = externalMemory1[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); @@ -1288,7 +1282,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Failed to set kernel arg"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg( update_buffer_kernel1[launchIter], i + 1, @@ -1313,7 +1307,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Error: Failed to launch " "update_buffer_kernel, error\n"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue1, 1, &buffers1[i][launchIter], 0, nullptr, @@ -1375,7 +1369,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Failed to set kernel arg"); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg( update_buffer_kernel2[launchIter], i + 1, @@ -1400,7 +1394,7 @@ int run_test_with_multi_import_diff_ctx( test_error_and_cleanup(err, CLEANUP, "Error: Failed to launch " "update_buffer_kernel, error\n "); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clEnqueueReleaseExternalMemObjectsKHRptr( cmd_queue2, 1, &buffers2[i][launchIter], 0, nullptr, @@ -1448,7 +1442,7 @@ int run_test_with_multi_import_diff_ctx( "Error: Failed read output, error \n"); calc_max_iter = maxIter * 2 * (numBuffers + 1); - for (int i = 0; i < numBuffers; i++) + for (uint32_t i = 0; i < numBuffers; i++) { err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem), (void *)&(buffers1[i][0])); @@ -1483,7 +1477,7 @@ int run_test_with_multi_import_diff_ctx( } } *error_3 = 0; - for (int i = 0; i < vkBufferList.size(); i++) + for (size_t i = 0; i < vkBufferList.size(); i++) { err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem), (void *)&(buffers2[i][0])); @@ -1519,7 +1513,7 @@ int run_test_with_multi_import_diff_ctx( } for (size_t i = 0; i < vkBufferList.size(); i++) { - for (size_t j = 0; j < numImports; j++) + for (int j = 0; j < numImports; j++) { delete externalMemory1[i][j]; delete externalMemory2[i][j]; @@ -1597,14 +1591,7 @@ struct BufferTestBase : public VulkanTestBase int test_buffer_common(bool use_fence) { - int current_device = 0; - int device_count = 0; - int devices_prohibited = 0; cl_int errNum = CL_SUCCESS; - size_t extensionSize = 0; - const size_t bufsize = BUFFERSIZE; - char buf[BUFFERSIZE]; - char *extensions = NULL; clKernelWrapper verify_kernel; clKernelWrapper verify_kernel2; clKernelWrapper kernel[3] = { NULL, NULL, NULL }; @@ -1624,7 +1611,6 @@ struct BufferTestBase : public VulkanTestBase uint32_t numBuffersList[] = { 1, 2, 4 }; uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 }; - uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 }; std::vector supportedSemaphoreTypes; @@ -1740,36 +1726,28 @@ struct BufferTestBase : public VulkanTestBase if (multiImport && !multiCtx) { errNum = run_test_with_multi_import_same_ctx( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, + context, cmd_queue1, kernel, verify_kernel, *vkDevice, numBuffers, bufferSize, use_fence, semaphoreType); } else if (multiImport && multiCtx) { errNum = run_test_with_multi_import_diff_ctx( - context, (cl_context &)context2, - (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue3, - (cl_kernel *)&kernel, (cl_kernel *)&kernel2, - (cl_kernel &)verify_kernel, verify_kernel2, - *vkDevice, numBuffers, bufferSize, use_fence, - semaphoreType); + context, context2, cmd_queue1, cmd_queue3, kernel, + kernel2, verify_kernel, verify_kernel2, *vkDevice, + numBuffers, bufferSize, use_fence, semaphoreType); } else if (numCQ == 2) { errNum = run_test_with_two_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue2, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, - *vkDevice, numBuffers + 1, bufferSize, use_fence, - semaphoreType); + context, cmd_queue1, cmd_queue2, kernel, + verify_kernel, *vkDevice, numBuffers + 1, + bufferSize, use_fence, semaphoreType); } else { errNum = run_test_with_one_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)&kernel, (cl_kernel &)verify_kernel, + context, cmd_queue1, kernel, verify_kernel, *vkDevice, numBuffers, bufferSize, semaphoreType, use_fence); } diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index a73dde3f..0dc2fa8a 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -17,6 +17,7 @@ #include #include #include "harness/errorHelpers.h" +#include "harness/imageHelpers.h" #include "harness/os_helpers.h" #include @@ -55,7 +56,6 @@ struct Params uint32_t numImage2DDescriptors; }; -cl_uchar uuid[CL_UUID_SIZE_KHR]; cl_device_id deviceId = NULL; size_t max_width = MAX_2D_IMAGE_WIDTH; size_t max_height = MAX_2D_IMAGE_HEIGHT; @@ -137,6 +137,38 @@ const uint32_t num2DImagesList[] = { 1, 2, 4 }; const uint32_t widthList[] = { 4, 64, 183, 1024 }; const uint32_t heightList[] = { 4, 64, 365 }; +bool memcmp_images(const void *a, const void *b, size_t size, + cl_image_format format) +{ + if (format.image_channel_data_type == CL_FLOAT) + { + const float *a_float = static_cast(a); + const float *b_float = static_cast(b); + return !std::equal(a_float, a_float + size / sizeof(*a_float), b_float, + b_float + size / sizeof(*b_float), + [](float a, float b) { + if (isnan(a) && isnan(b)) return true; + return a == b; + }); + } + else if (format.image_channel_data_type == CL_HALF_FLOAT) + { + const cl_half *a_half = static_cast(a); + const cl_half *b_half = static_cast(b); + return !std::equal(a_half, a_half + size / sizeof(*a_half), b_half, + b_half + size / sizeof(*b_half), + [](cl_half a, cl_half b) { + if (is_half_nan(a) && is_half_nan(b)) + return true; + return a == b; + }); + } + else + { + return memcmp(a, b, size) != 0; + } +} + const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float, cl_kernel kernel_signed, cl_kernel kernel_unsigned) @@ -195,9 +227,10 @@ const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float, } int run_test_with_two_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_command_queue &cmd_queue2, cl_kernel *kernel_unsigned, - cl_kernel *kernel_signed, cl_kernel *kernel_float, VulkanDevice &vkDevice, + cl_context context, cl_command_queue cmd_queue1, + cl_command_queue cmd_queue2, clKernelWrapper *kernel_unsigned, + clKernelWrapper *kernel_signed, clKernelWrapper *kernel_float, + VulkanDevice &vkDevice, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { cl_int err = CL_SUCCESS; @@ -408,7 +441,6 @@ int run_test_with_two_queue( } size_t totalImageMemSize = 0; - uint64_t interImageOffset = 0; { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, @@ -464,7 +496,7 @@ int run_test_with_two_queue( cl_mem external_mem_image1[5]; cl_mem external_mem_image2[5]; - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { external_mem_image1[i] = externalMemory1[i] @@ -632,7 +664,8 @@ int run_test_with_two_queue( err |= clSetKernelArg(updateKernelCQ2, 7, sizeof(unsigned int), &numMipLevels); - for (int i = 0; i < num2DImages - 1; i++, ++j) + for (uint32_t i = 0; i < num2DImages - 1; + i++, ++j) { err = clSetKernelArg( updateKernelCQ1, j, sizeof(cl_mem), @@ -732,11 +765,8 @@ int run_test_with_two_queue( "Failed to signal CL semaphore\n"); } - unsigned int flags = 0; - size_t mipmapLevelOffset = 0; - cl_event eventReadImage = NULL; clFinish(cmd_queue2); - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { err = clEnqueueReadImage( cmd_queue1, external_mem_image2[i], CL_TRUE, @@ -747,8 +777,9 @@ int run_test_with_two_queue( "clEnqueueReadImage failed with" "error\n"); - if (memcmp(srcBufferPtr, dstBufferPtr, - srcBufSize)) + if (memcmp_images( + srcBufferPtr, dstBufferPtr, srcBufSize, + externalMemory2[i]->getImageFormat())) { log_info("Source and destination buffers " "don't match\n"); @@ -776,7 +807,7 @@ int run_test_with_two_queue( break; } } - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { delete vkImage2DListDeviceMemory1[i]; delete vkImage2DListDeviceMemory2[i]; @@ -817,9 +848,9 @@ CLEANUP: } int run_test_with_one_queue( - cl_context &context, cl_command_queue &cmd_queue1, - cl_kernel *kernel_unsigned, cl_kernel *kernel_signed, - cl_kernel *kernel_float, VulkanDevice &vkDevice, + cl_context context, cl_command_queue cmd_queue1, + clKernelWrapper *kernel_unsigned, clKernelWrapper *kernel_signed, + clKernelWrapper *kernel_float, VulkanDevice &vkDevice, VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType) { cl_int err = CL_SUCCESS; @@ -1027,7 +1058,6 @@ int run_test_with_one_queue( } } size_t totalImageMemSize = 0; - uint64_t interImageOffset = 0; { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, @@ -1088,7 +1118,7 @@ int run_test_with_one_queue( cl_mem external_mem_image1[4]; cl_mem external_mem_image2[4]; - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { external_mem_image1[i] = externalMemory1[i] @@ -1223,7 +1253,7 @@ int run_test_with_one_queue( break; } int j = 0; - for (int i = 0; i < num2DImages; i++, ++j) + for (uint32_t i = 0; i < num2DImages; i++, ++j) { err = clSetKernelArg( updateKernelCQ1, j, sizeof(cl_mem), @@ -1289,10 +1319,7 @@ int run_test_with_one_queue( "Failed to signal CL semaphore\n"); } - unsigned int flags = 0; - size_t mipmapLevelOffset = 0; - cl_event eventReadImage = NULL; - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { err = clEnqueueReadImage( cmd_queue1, external_mem_image2[i], CL_TRUE, @@ -1303,8 +1330,9 @@ int run_test_with_one_queue( "clEnqueueReadImage failed with" "error\n"); - if (memcmp(srcBufferPtr, dstBufferPtr, - srcBufSize)) + if (memcmp_images( + srcBufferPtr, dstBufferPtr, srcBufSize, + externalMemory2[i]->getImageFormat())) { log_info("Source and destination buffers " "don't match\n"); @@ -1332,7 +1360,7 @@ int run_test_with_one_queue( break; } } - for (int i = 0; i < num2DImages; i++) + for (uint32_t i = 0; i < num2DImages; i++) { delete vkImage2DListDeviceMemory1[i]; delete vkImage2DListDeviceMemory2[i]; @@ -1431,7 +1459,7 @@ struct ImageCommonTest : public VulkanTestBase log_info("clCreateCommandQueue2 successful \n"); - for (int i = 0; i < num_kernels; i++) + for (uint32_t i = 0; i < num_kernels; i++) { switch (i) { @@ -1482,7 +1510,7 @@ struct ImageCommonTest : public VulkanTestBase } const char *sourceTexts[num_kernel_types] = { source_1, source_2, source_3 }; - for (int k = 0; k < num_kernel_types; k++) + for (uint32_t k = 0; k < num_kernel_types; k++) { program_source_length = strlen(sourceTexts[k]); program[k] = clCreateProgramWithSource( @@ -1508,20 +1536,16 @@ struct ImageCommonTest : public VulkanTestBase { if (numCQ == 2) { - err = run_test_with_two_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_command_queue &)cmd_queue2, - (cl_kernel *)kernel_unsigned, (cl_kernel *)kernel_signed, - (cl_kernel *)kernel_float, *vkDevice, - externalSemaphoreType); + err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2, + kernel_unsigned, kernel_signed, + kernel_float, *vkDevice, + externalSemaphoreType); } else { err = run_test_with_one_queue( - context, (cl_command_queue &)cmd_queue1, - (cl_kernel *)kernel_unsigned, (cl_kernel *)kernel_signed, - (cl_kernel *)kernel_float, *vkDevice, - externalSemaphoreType); + context, cmd_queue1, kernel_unsigned, kernel_signed, + kernel_float, *vkDevice, externalSemaphoreType); } test_error(err, "func_name failed \n"); }