mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 08:49:02 +00:00
Merge branch 'main' into cl_khr_unified_svm
This commit is contained in:
26
.github/workflows/presubmit.yml
vendored
26
.github/workflows/presubmit.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
build-type: [Release]
|
build-type: [Release]
|
||||||
gl: [0]
|
gl: [0]
|
||||||
os: [ubuntu-22.04, macos-latest, windows-latest]
|
os: [ubuntu-22.04, macos-latest, windows-latest, windows-11-arm]
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
gl: 1
|
gl: 1
|
||||||
@@ -28,7 +28,7 @@ jobs:
|
|||||||
arch: android-aarch64
|
arch: android-aarch64
|
||||||
android_arch_abi: arm64-v8a
|
android_arch_abi: arm64-v8a
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- name: Setup Ninja
|
- name: Setup Ninja
|
||||||
uses: seanmiddleditch/gha-setup-ninja@master
|
uses: seanmiddleditch/gha-setup-ninja@master
|
||||||
- name: Install Arm and AArch64 compilers
|
- name: Install Arm and AArch64 compilers
|
||||||
@@ -62,6 +62,10 @@ jobs:
|
|||||||
git checkout cl_khr_unified_svm
|
git checkout cl_khr_unified_svm
|
||||||
ln -s CL OpenCL # For OSX builds
|
ln -s CL OpenCL # For OSX builds
|
||||||
cd ..
|
cd ..
|
||||||
|
- name: Fetch SPIR-V Headers
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
git clone https://github.com/KhronosGroup/SPIRV-Headers.git
|
||||||
- name: Install Vulkan SDK
|
- name: Install Vulkan SDK
|
||||||
uses: humbletim/install-vulkan-sdk@main
|
uses: humbletim/install-vulkan-sdk@main
|
||||||
with:
|
with:
|
||||||
@@ -70,12 +74,13 @@ jobs:
|
|||||||
- name: Install Android NDK
|
- name: Install Android NDK
|
||||||
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
|
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
|
||||||
run: |
|
run: |
|
||||||
wget https://dl.google.com/android/repository/android-ndk-r27c-linux.zip -O android-ndk.zip
|
wget https://dl.google.com/android/repository/android-ndk-r28c-linux.zip -O android-ndk.zip
|
||||||
unzip android-ndk.zip -d $HOME
|
unzip android-ndk.zip -d $HOME
|
||||||
export ANDROID_NDK=$HOME/android-ndk-r27c
|
export ANDROID_NDK=$HOME/android-ndk-r28c
|
||||||
echo "ANDROID_NDK=$ANDROID_NDK" >> $GITHUB_ENV
|
echo "ANDROID_NDK=$ANDROID_NDK" >> $GITHUB_ENV
|
||||||
export ANDROID_ARCH_ABI=${{ matrix.android_arch_abi }}
|
export ANDROID_ARCH_ABI=${{ matrix.android_arch_abi }}
|
||||||
echo "ANDROID_ARCH_ABI=$ANDROID_ARCH_ABI" >> $GITHUB_ENV
|
echo "ANDROID_ARCH_ABI=$ANDROID_ARCH_ABI" >> $GITHUB_ENV
|
||||||
|
echo "ANDROID_PLATFORM=29" >> $GITHUB_ENV
|
||||||
- name: Prepare CMake Toolchain file
|
- name: Prepare CMake Toolchain file
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -108,7 +113,7 @@ jobs:
|
|||||||
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
|
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI}" >> $GITHUB_ENV
|
echo "CMAKE_ADDITIONAL_CONFIG_ARGS=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV
|
||||||
- name: Fetch and build OpenCL ICD Loader
|
- name: Fetch and build OpenCL ICD Loader
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -120,7 +125,7 @@ jobs:
|
|||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
||||||
-DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \
|
-DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \
|
||||||
"${CMAKE_CONFIG_ARGS_ANDROID}"
|
${CMAKE_ADDITIONAL_CONFIG_ARGS}
|
||||||
cmake --build . --parallel
|
cmake --build . --parallel
|
||||||
- name: Fetch Vulkan Headers
|
- name: Fetch Vulkan Headers
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -150,6 +155,7 @@ jobs:
|
|||||||
cd build
|
cd build
|
||||||
if [[ ${RUNNER_OS} == "Windows" ]]; then
|
if [[ ${RUNNER_OS} == "Windows" ]]; then
|
||||||
CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
|
CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
|
||||||
|
CMAKE_ADDITIONAL_CONFIG_ARGS="-DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON"
|
||||||
else
|
else
|
||||||
CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL"
|
CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL"
|
||||||
if [[ '${{ matrix.arch }}' != android-* ]]; then
|
if [[ '${{ matrix.arch }}' != android-* ]]; then
|
||||||
@@ -158,8 +164,10 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
cmake .. -G Ninja \
|
cmake .. -G Ninja \
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
|
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
|
||||||
-DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \
|
-DCMAKE_C_COMPILER_LAUNCHER=sccache \
|
||||||
|
-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
|
||||||
-DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \
|
-DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \
|
||||||
|
-DSPIRV_INCLUDE_DIR='${{ github.workspace }}'/SPIRV-Headers \
|
||||||
-DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \
|
-DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \
|
||||||
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
||||||
-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
|
-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
|
||||||
@@ -169,7 +177,7 @@ jobs:
|
|||||||
-DVULKAN_IS_SUPPORTED=ON \
|
-DVULKAN_IS_SUPPORTED=ON \
|
||||||
-DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \
|
-DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \
|
||||||
-DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \
|
-DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \
|
||||||
"${CMAKE_CONFIG_ARGS_ANDROID}"
|
${CMAKE_ADDITIONAL_CONFIG_ARGS}
|
||||||
cmake --build . --parallel
|
cmake --build . --parallel
|
||||||
formatcheck:
|
formatcheck:
|
||||||
name: Check code format
|
name: Check code format
|
||||||
@@ -177,7 +185,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Install packages
|
- name: Install packages
|
||||||
run: sudo apt install -y clang-format clang-format-14
|
run: sudo apt install -y clang-format clang-format-14
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Check code format
|
- name: Check code format
|
||||||
|
|||||||
@@ -59,6 +59,12 @@ else(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
|||||||
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
|
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
|
||||||
endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||||
|
|
||||||
|
# SPIRV_INCLUDE_DIR - path to dir with SPIR-V headers
|
||||||
|
if(NOT SPIRV_INCLUDE_DIR)
|
||||||
|
message(STATUS "SPIR-V headers haven't been found!")
|
||||||
|
message(FATAL_ERROR "Pass -DSPIRV_INCLUDE_DIR")
|
||||||
|
endif(NOT SPIRV_INCLUDE_DIR)
|
||||||
|
|
||||||
# CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
|
# CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
|
||||||
if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR)
|
if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR)
|
||||||
link_directories(${CLConform_GL_LIBRARIES_DIR})
|
link_directories(${CLConform_GL_LIBRARIES_DIR})
|
||||||
@@ -195,6 +201,7 @@ if(APPLE)
|
|||||||
endif(APPLE)
|
endif(APPLE)
|
||||||
|
|
||||||
include_directories(SYSTEM ${CL_INCLUDE_DIR})
|
include_directories(SYSTEM ${CL_INCLUDE_DIR})
|
||||||
|
include_directories(SYSTEM ${SPIRV_INCLUDE_DIR}/include)
|
||||||
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
|
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
|
||||||
${CLConform_SOURCE_DIR}/test_common/gles
|
${CLConform_SOURCE_DIR}/test_common/gles
|
||||||
${CLConform_SOURCE_DIR}/test_common/gl
|
${CLConform_SOURCE_DIR}/test_common/gl
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ Compiling the CTS requires the following CMake configuration options to be set:
|
|||||||
|
|
||||||
* `CL_INCLUDE_DIR` Points to the unified
|
* `CL_INCLUDE_DIR` Points to the unified
|
||||||
[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers).
|
[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers).
|
||||||
|
* `SPIRV_INCLUDE_DIR` Points to the unified
|
||||||
|
[SPIRV-Headers](https://github.com/KhronosGroup/SPIRV-Headers).
|
||||||
* `CL_LIB_DIR` Directory containing the OpenCL library to build against.
|
* `CL_LIB_DIR` Directory containing the OpenCL library to build against.
|
||||||
* `SPIRV_TOOLS_DIR` Directory containing the `spirv-as` and `spirv-val` binaries
|
* `SPIRV_TOOLS_DIR` Directory containing the `spirv-as` and `spirv-val` binaries
|
||||||
to be used in the CTS build process. Alternatively, the location to these binaries
|
to be used in the CTS build process. Alternatively, the location to these binaries
|
||||||
@@ -31,6 +33,7 @@ a build, and compile.
|
|||||||
```sh
|
```sh
|
||||||
git clone https://github.com/KhronosGroup/OpenCL-CTS.git
|
git clone https://github.com/KhronosGroup/OpenCL-CTS.git
|
||||||
git clone https://github.com/KhronosGroup/OpenCL-Headers.git
|
git clone https://github.com/KhronosGroup/OpenCL-Headers.git
|
||||||
|
git clone https://github.com/KhronosGroup/SPIRV-Headers.git
|
||||||
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
|
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
|
||||||
git clone https://github.com/KhronosGroup/SPIRV-Tools.git
|
git clone https://github.com/KhronosGroup/SPIRV-Tools.git
|
||||||
git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers
|
git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers
|
||||||
@@ -50,6 +53,7 @@ cmake --build SPIRV-Tools/build --config Release
|
|||||||
mkdir OpenCL-CTS/build
|
mkdir OpenCL-CTS/build
|
||||||
cmake -S OpenCL-CTS -B OpenCL-CTS/build \
|
cmake -S OpenCL-CTS -B OpenCL-CTS/build \
|
||||||
-DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \
|
-DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \
|
||||||
|
-DSPIRV_INCLUDE_DIR=$PWD/SPIRV-Headers \
|
||||||
-DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \
|
-DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \
|
||||||
-DSPIRV_TOOLS_DIR=$PWD/SPIRV-Tools/build/tools/ \
|
-DSPIRV_TOOLS_DIR=$PWD/SPIRV-Tools/build/tools/ \
|
||||||
-DOPENCL_LIBRARIES=OpenCL
|
-DOPENCL_LIBRARIES=OpenCL
|
||||||
|
|||||||
@@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p)
|
|||||||
|
|
||||||
// drop run count to 0
|
// drop run count to 0
|
||||||
gRunCount = 0;
|
gRunCount = 0;
|
||||||
|
#if defined(_M_IX86) || defined(_M_X64)
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
|
#elif defined(_M_ARM64)
|
||||||
|
__dmb(_ARM64_BARRIER_ISHST);
|
||||||
|
#else
|
||||||
|
#error Architecture needs an implementation
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
if (pthread_mutex_lock(&gAtomicLock))
|
if (pthread_mutex_lock(&gAtomicLock))
|
||||||
log_error(
|
log_error(
|
||||||
@@ -703,7 +710,13 @@ void ThreadPool_Exit(void)
|
|||||||
// http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
// http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||||
__sync_synchronize();
|
__sync_synchronize();
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
|
#if defined(_M_IX86) || defined(_M_X64)
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
|
#elif defined(_M_ARM64)
|
||||||
|
__dmb(_ARM64_BARRIER_ISHST);
|
||||||
|
#else
|
||||||
|
#error Architecture needs an implementation
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -23,10 +23,10 @@
|
|||||||
|
|
||||||
#include <CL/cl_half.h>
|
#include <CL/cl_half.h>
|
||||||
|
|
||||||
#if defined(__SSE__) || defined(_MSC_VER)
|
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ static long lrintf_clamped(float f)
|
|||||||
volatile float x = f;
|
volatile float x = f;
|
||||||
float magicVal = magic[f < 0];
|
float magicVal = magic[f < 0];
|
||||||
|
|
||||||
#if defined(__SSE__) || defined(_WIN32)
|
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||||
// incorrectly
|
// incorrectly
|
||||||
__m128 v = _mm_set_ss(x);
|
__m128 v = _mm_set_ss(x);
|
||||||
@@ -150,7 +150,7 @@ static long lrint_clamped(double f)
|
|||||||
{
|
{
|
||||||
volatile double x = f;
|
volatile double x = f;
|
||||||
double magicVal = magic[f < 0];
|
double magicVal = magic[f < 0];
|
||||||
#if defined(__SSE2__) || (defined(_MSC_VER))
|
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||||
// incorrectly
|
// incorrectly
|
||||||
__m128d v = _mm_set_sd(x);
|
__m128d v = _mm_set_sd(x);
|
||||||
|
|||||||
@@ -387,8 +387,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
int ulp_exp =
|
int ulp_exp = HALF_MANT_DIG - std::max(ilogb(reference), HALF_MIN_EXP);
|
||||||
HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1);
|
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float)scalbn(testVal - reference, ulp_exp);
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
@@ -469,8 +468,7 @@ float Ulp_Error(float test, double reference)
|
|||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp =
|
int ulp_exp = FLT_MANT_DIG - std::max(ilogb(reference), FLT_MIN_EXP);
|
||||||
FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1);
|
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float)scalbn(testVal - reference, ulp_exp);
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
@@ -553,8 +551,7 @@ float Ulp_Error_Double(double test, long double reference)
|
|||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp =
|
int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP);
|
||||||
DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
|
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
||||||
|
|||||||
@@ -37,36 +37,44 @@ typedef int FPU_mode_type;
|
|||||||
#else
|
#else
|
||||||
typedef int64_t FPU_mode_type;
|
typedef int64_t FPU_mode_type;
|
||||||
#endif
|
#endif
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|| defined(__MINGW32__)
|
|| defined(_M_X64) || defined(__MINGW32__)
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
|
#elif defined(_M_ARM64)
|
||||||
|
#include <intrin.h>
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
#include <fpu_control.h>
|
#include <fpu_control.h>
|
||||||
extern __thread fpu_control_t fpu_control;
|
extern __thread fpu_control_t fpu_control;
|
||||||
#elif defined(__mips__)
|
#elif defined(__mips__)
|
||||||
#include "mips/m32c1.h"
|
#include "mips/m32c1.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Set the reference hardware floating point unit to FTZ mode
|
// Set the reference hardware floating point unit to FTZ mode
|
||||||
inline void ForceFTZ(FPU_mode_type *mode)
|
inline void ForceFTZ(FPU_mode_type *oldMode)
|
||||||
{
|
{
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|| defined(__MINGW32__)
|
|| defined(_M_X64) || defined(__MINGW32__)
|
||||||
*mode = _mm_getcsr();
|
*oldMode = _mm_getcsr();
|
||||||
_mm_setcsr(*mode | 0x8040);
|
_mm_setcsr(*oldMode | 0x8040);
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
*mode = fpu_control;
|
*oldMode = fpu_control;
|
||||||
fpu_control |= _FPU_MASK_NI;
|
fpu_control |= _FPU_MASK_NI;
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*oldMode = fpscr;
|
||||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__) // Clang
|
||||||
uint64_t fpscr;
|
uint64_t fpscr;
|
||||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*oldMode = fpscr;
|
||||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
|
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
|
||||||
|
#elif defined(_M_ARM64) // Visual Studio
|
||||||
|
uint64_t fpscr;
|
||||||
|
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||||
|
*oldMode = fpscr;
|
||||||
|
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||||
#elif defined(__mips__)
|
#elif defined(__mips__)
|
||||||
fpa_bissr(FPA_CSR_FS);
|
fpa_bissr(FPA_CSR_FS);
|
||||||
#else
|
#else
|
||||||
@@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Disable the denorm flush to zero
|
// Disable the denorm flush to zero
|
||||||
inline void DisableFTZ(FPU_mode_type *mode)
|
inline void DisableFTZ(FPU_mode_type *oldMode)
|
||||||
{
|
{
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|| defined(__MINGW32__)
|
|| defined(_M_X64) || defined(__MINGW32__)
|
||||||
*mode = _mm_getcsr();
|
*oldMode = _mm_getcsr();
|
||||||
_mm_setcsr(*mode & ~0x8040);
|
_mm_setcsr(*oldMode & ~0x8040);
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
*mode = fpu_control;
|
*mode = fpu_control;
|
||||||
fpu_control &= ~_FPU_MASK_NI;
|
fpu_control &= ~_FPU_MASK_NI;
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*oldMode = fpscr;
|
||||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__) // Clang
|
||||||
uint64_t fpscr;
|
uint64_t fpscr;
|
||||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*oldMode = fpscr;
|
||||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
|
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||||
|
#elif defined(_M_ARM64) // Visual Studio
|
||||||
|
uint64_t fpscr;
|
||||||
|
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||||
|
*oldMode = fpscr;
|
||||||
|
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||||
#elif defined(__mips__)
|
#elif defined(__mips__)
|
||||||
fpa_bicsr(FPA_CSR_FS);
|
fpa_bicsr(FPA_CSR_FS);
|
||||||
#else
|
#else
|
||||||
@@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode)
|
|||||||
// Restore the reference hardware to floating point state indicated by *mode
|
// Restore the reference hardware to floating point state indicated by *mode
|
||||||
inline void RestoreFPState(FPU_mode_type *mode)
|
inline void RestoreFPState(FPU_mode_type *mode)
|
||||||
{
|
{
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|| defined(__MINGW32__)
|
|| defined(_M_X64) || defined(__MINGW32__)
|
||||||
_mm_setcsr(*mode);
|
_mm_setcsr(*mode);
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
fpu_control = *mode;
|
fpu_control = *mode;
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__)
|
||||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__) // Clang
|
||||||
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
|
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
|
||||||
|
#elif defined(_M_ARM64) // Visual Studio
|
||||||
|
_WriteStatusReg(ARM64_FPCR, *mode);
|
||||||
#elif defined(__mips__)
|
#elif defined(__mips__)
|
||||||
// Mips runs by default with DAZ=1 FTZ=1
|
// Mips runs by default with DAZ=1 FTZ=1
|
||||||
#else
|
#else
|
||||||
|
|||||||
35
test_common/harness/mathHelpers.h
Normal file
35
test_common/harness/mathHelpers.h
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#ifndef _mathHelpers_h
|
||||||
|
#define _mathHelpers_h
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#include <OpenCL/cl_platform.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl_platform.h>
|
||||||
|
#endif
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
template <typename T> inline bool isnan_fp(const T &v) { return std::isnan(v); }
|
||||||
|
|
||||||
|
template <> inline bool isnan_fp<cl_half>(const cl_half &v)
|
||||||
|
{
|
||||||
|
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||||
|
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||||
|
return (h_exp == 0x1F && h_mant != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _mathHelpers_h
|
||||||
@@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern)
|
|||||||
#endif // !__has_builtin(__builtin_clz)
|
#endif // !__has_builtin(__builtin_clz)
|
||||||
|
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
|
#if !defined(_M_ARM64)
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
int usleep(int usec)
|
int usleep(int usec)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -35,6 +35,8 @@ std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
|
|||||||
bool gDisableSPIRVValidation = false;
|
bool gDisableSPIRVValidation = false;
|
||||||
std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
|
std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
|
||||||
unsigned gNumWorkerThreads;
|
unsigned gNumWorkerThreads;
|
||||||
|
bool gListTests = false;
|
||||||
|
bool gWimpyMode = false;
|
||||||
|
|
||||||
void helpInfo()
|
void helpInfo()
|
||||||
{
|
{
|
||||||
@@ -49,6 +51,12 @@ void helpInfo()
|
|||||||
spir-v Use SPIR-V offline compilation
|
spir-v Use SPIR-V offline compilation
|
||||||
--num-worker-threads <num>
|
--num-worker-threads <num>
|
||||||
Select parallel execution with the specified number of worker threads.
|
Select parallel execution with the specified number of worker threads.
|
||||||
|
--list
|
||||||
|
List sub-tests
|
||||||
|
-w, --wimpy
|
||||||
|
Enable wimpy mode. It does not impact all tests. Impacted tests will run
|
||||||
|
with a very small subset of the tests. This option should not be used
|
||||||
|
for conformance submission (default: disabled).
|
||||||
|
|
||||||
For offline compilation (binary and spir-v modes) only:
|
For offline compilation (binary and spir-v modes) only:
|
||||||
--compilation-cache-mode <cache-mode>
|
--compilation-cache-mode <cache-mode>
|
||||||
@@ -104,6 +112,16 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
|
|||||||
// option and print its own help.
|
// option and print its own help.
|
||||||
helpInfo();
|
helpInfo();
|
||||||
}
|
}
|
||||||
|
else if (!strcmp(argv[i], "--list") || !strcmp(argv[i], "-list"))
|
||||||
|
{
|
||||||
|
delArg++;
|
||||||
|
gListTests = true;
|
||||||
|
}
|
||||||
|
else if (!strcmp(argv[i], "--wimpy") || !strcmp(argv[i], "-w"))
|
||||||
|
{
|
||||||
|
delArg++;
|
||||||
|
gWimpyMode = true;
|
||||||
|
}
|
||||||
else if (!strcmp(argv[i], "--compilation-mode"))
|
else if (!strcmp(argv[i], "--compilation-mode"))
|
||||||
{
|
{
|
||||||
delArg++;
|
delArg++;
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ extern std::string gCompilationCachePath;
|
|||||||
extern std::string gCompilationProgram;
|
extern std::string gCompilationProgram;
|
||||||
extern bool gDisableSPIRVValidation;
|
extern bool gDisableSPIRVValidation;
|
||||||
extern std::string gSPIRVValidator;
|
extern std::string gSPIRVValidator;
|
||||||
|
extern bool gListTests;
|
||||||
|
extern bool gWimpyMode;
|
||||||
|
|
||||||
extern int parseCustomParam(int argc, const char *argv[],
|
extern int parseCustomParam(int argc, const char *argv[],
|
||||||
const char *ignore = 0);
|
const char *ignore = 0);
|
||||||
|
|||||||
@@ -193,7 +193,8 @@ RoundingMode get_round(void)
|
|||||||
// basic_test_conversions.c in which case, these function are at
|
// basic_test_conversions.c in which case, these function are at
|
||||||
// liberty to do nothing.
|
// liberty to do nothing.
|
||||||
//
|
//
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|
|| defined(_M_X64)
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
#include <fpu_control.h>
|
#include <fpu_control.h>
|
||||||
@@ -203,18 +204,24 @@ RoundingMode get_round(void)
|
|||||||
void *FlushToZero(void)
|
void *FlushToZero(void)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|
|| defined(_M_X64)
|
||||||
union {
|
union {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
void *p;
|
void *p;
|
||||||
} u = { _mm_getcsr() };
|
} u = { _mm_getcsr() };
|
||||||
_mm_setcsr(u.i | 0x8040);
|
_mm_setcsr(u.i | 0x8040);
|
||||||
return u.p;
|
return u.p;
|
||||||
#elif defined(__arm__) || defined(__aarch64__)
|
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||||
int64_t fpscr;
|
int64_t fpscr;
|
||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
#elif defined(_M_ARM64) // Visual Studio
|
||||||
|
uint64_t fpscr;
|
||||||
|
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||||
|
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||||
|
return NULL;
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
_FPU_GETCW(flags);
|
_FPU_GETCW(flags);
|
||||||
@@ -237,16 +244,21 @@ void *FlushToZero(void)
|
|||||||
void UnFlushToZero(void *p)
|
void UnFlushToZero(void *p)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||||
|
|| defined(_M_X64)
|
||||||
union {
|
union {
|
||||||
void *p;
|
void *p;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
} u = { p };
|
} u = { p };
|
||||||
_mm_setcsr(u.i);
|
_mm_setcsr(u.i);
|
||||||
#elif defined(__arm__) || defined(__aarch64__)
|
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||||
int64_t fpscr;
|
int64_t fpscr;
|
||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||||
|
#elif defined(_M_ARM64) // Visual Studio
|
||||||
|
uint64_t fpscr;
|
||||||
|
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||||
|
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||||
#elif defined(__PPC__)
|
#elif defined(__PPC__)
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
_FPU_GETCW(flags);
|
_FPU_GETCW(flags);
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <set>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -169,6 +170,19 @@ void version_expected_info(const char *test_name, const char *api_name,
|
|||||||
"reports %s version %s)\n",
|
"reports %s version %s)\n",
|
||||||
test_name, api_name, expected_version, api_name, device_version);
|
test_name, api_name, expected_version, api_name, device_version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void list_tests(int testNum, test_definition testList[])
|
||||||
|
{
|
||||||
|
std::set<std::string> names;
|
||||||
|
for (int i = 0; i < testNum; i++)
|
||||||
|
{
|
||||||
|
names.insert(testList[i].name);
|
||||||
|
}
|
||||||
|
for (const auto &name : names)
|
||||||
|
{
|
||||||
|
log_info("\t%s\n", name.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
||||||
test_definition testList[],
|
test_definition testList[],
|
||||||
int forceNoContextCreation,
|
int forceNoContextCreation,
|
||||||
@@ -197,7 +211,10 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
if (env_mode != NULL)
|
if (env_mode != NULL)
|
||||||
{
|
{
|
||||||
based_on_env_var = 1;
|
based_on_env_var = 1;
|
||||||
if (strcmp(env_mode, "gpu") == 0
|
if (strcmp(env_mode, "all") == 0
|
||||||
|
|| strcmp(env_mode, "CL_DEVICE_TYPE_ALL") == 0)
|
||||||
|
device_type = CL_DEVICE_TYPE_ALL;
|
||||||
|
else if (strcmp(env_mode, "gpu") == 0
|
||||||
|| strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
|
|| strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
|
||||||
device_type = CL_DEVICE_TYPE_GPU;
|
device_type = CL_DEVICE_TYPE_GPU;
|
||||||
else if (strcmp(env_mode, "cpu") == 0
|
else if (strcmp(env_mode, "cpu") == 0
|
||||||
@@ -255,10 +272,23 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Special case: just list the tests */
|
if (gListTests)
|
||||||
if ((argc > 1)
|
{
|
||||||
&& (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h")
|
list_tests(testNum, testList);
|
||||||
|| !strcmp(argv[1], "--help")))
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gWimpyMode |= (getenv("CL_WIMPY_MODE") != nullptr);
|
||||||
|
if (gWimpyMode)
|
||||||
|
{
|
||||||
|
log_info("\n");
|
||||||
|
log_info("**************************\n");
|
||||||
|
log_info("*** Wimpy mode enabled ***\n");
|
||||||
|
log_info("**************************\n");
|
||||||
|
log_info("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
|
||||||
{
|
{
|
||||||
char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
|
char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
|
||||||
|
|
||||||
@@ -271,7 +301,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
"(default 0).\n");
|
"(default 0).\n");
|
||||||
log_info("\tid<num>\t\tIndicates device at index <num> should be used "
|
log_info("\tid<num>\t\tIndicates device at index <num> should be used "
|
||||||
"(default 0).\n");
|
"(default 0).\n");
|
||||||
log_info("\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
|
log_info("\t<device_type>\tall|cpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
|
||||||
"(default CL_DEVICE_TYPE_DEFAULT)\n");
|
"(default CL_DEVICE_TYPE_DEFAULT)\n");
|
||||||
log_info("\n");
|
log_info("\n");
|
||||||
log_info("\tNOTE: You may pass environment variable "
|
log_info("\tNOTE: You may pass environment variable "
|
||||||
@@ -281,10 +311,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
|
|
||||||
log_info("\n");
|
log_info("\n");
|
||||||
log_info("Test names:\n");
|
log_info("Test names:\n");
|
||||||
for (int i = 0; i < testNum; i++)
|
list_tests(testNum, testList);
|
||||||
{
|
|
||||||
log_info("\t%s\n", testList[i].name);
|
|
||||||
}
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,7 +347,13 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
/* Do we have a CPU/GPU specification? */
|
/* Do we have a CPU/GPU specification? */
|
||||||
if (argc > 1)
|
if (argc > 1)
|
||||||
{
|
{
|
||||||
if (strcmp(argv[argc - 1], "gpu") == 0
|
if (strcmp(argv[argc - 1], "all") == 0
|
||||||
|
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ALL") == 0)
|
||||||
|
{
|
||||||
|
device_type = CL_DEVICE_TYPE_ALL;
|
||||||
|
argc--;
|
||||||
|
}
|
||||||
|
else if (strcmp(argv[argc - 1], "gpu") == 0
|
||||||
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
|
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
|
||||||
{
|
{
|
||||||
device_type = CL_DEVICE_TYPE_GPU;
|
device_type = CL_DEVICE_TYPE_GPU;
|
||||||
@@ -376,6 +409,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
|||||||
|
|
||||||
switch (device_type)
|
switch (device_type)
|
||||||
{
|
{
|
||||||
|
case CL_DEVICE_TYPE_ALL: log_info("Requesting any device "); break;
|
||||||
case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break;
|
case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break;
|
||||||
case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break;
|
case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break;
|
||||||
case CL_DEVICE_TYPE_ACCELERATOR:
|
case CL_DEVICE_TYPE_ACCELERATOR:
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ public:
|
|||||||
Version(): m_major(0), m_minor(0) {}
|
Version(): m_major(0), m_minor(0) {}
|
||||||
|
|
||||||
Version(cl_uint major, cl_uint minor): m_major(major), m_minor(minor) {}
|
Version(cl_uint major, cl_uint minor): m_major(major), m_minor(minor) {}
|
||||||
int major() const { return m_major; }
|
int get_major() const { return m_major; }
|
||||||
int minor() const { return m_minor; }
|
int get_minor() const { return m_minor; }
|
||||||
bool operator>(const Version &rhs) const
|
bool operator>(const Version &rhs) const
|
||||||
{
|
{
|
||||||
return to_uint() > rhs.to_uint();
|
return to_uint() > rhs.to_uint();
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
set(MODULE_NAME API)
|
set(MODULE_NAME API)
|
||||||
|
|
||||||
|
find_package(Python3 COMPONENTS Interpreter QUIET)
|
||||||
|
|
||||||
set(${MODULE_NAME}_SOURCES
|
set(${MODULE_NAME}_SOURCES
|
||||||
main.cpp
|
main.cpp
|
||||||
negative_platform.cpp
|
negative_platform.cpp
|
||||||
@@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
test_pipe_properties_queries.cpp
|
test_pipe_properties_queries.cpp
|
||||||
test_wg_suggested_local_work_size.cpp
|
test_wg_suggested_local_work_size.cpp
|
||||||
test_device_command_queue.cpp
|
test_device_command_queue.cpp
|
||||||
|
test_spirv_queries.cpp
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
|
||||||
)
|
)
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
|
||||||
|
COMMENT "Generating spirv_capability_deps.def..."
|
||||||
|
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py
|
||||||
|
--grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
|
||||||
|
--output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def"
|
||||||
|
DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
|
||||||
|
USES_TERMINAL
|
||||||
|
VERBATIM)
|
||||||
|
|
||||||
include(../CMakeCommon.txt)
|
include(../CMakeCommon.txt)
|
||||||
|
|
||||||
|
target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|||||||
102
test_conformance/api/generate_spirv_capability_deps.py
Normal file
102
test_conformance/api/generate_spirv_capability_deps.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#####################################################################
|
||||||
|
|
||||||
|
"""
|
||||||
|
Generates a file describing the SPIR-V extension dependencies or SPIR-V version
|
||||||
|
dependencies for a SPIR-V capability. This can be used to ensure that if support
|
||||||
|
for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V
|
||||||
|
version is also supported.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
header_text = """\
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
// This file is generated from the SPIR-V JSON grammar file.
|
||||||
|
// Please do not edit it directly!
|
||||||
|
"""
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities')
|
||||||
|
|
||||||
|
parser.add_argument('--grammar', metavar='<path>',
|
||||||
|
type=str, required=True,
|
||||||
|
help='input JSON grammar file')
|
||||||
|
parser.add_argument('--output', metavar='<path>',
|
||||||
|
type=str, required=False,
|
||||||
|
help='output file path (default: stdout)')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
dependencies = {}
|
||||||
|
capabilities = []
|
||||||
|
with open(args.grammar) as json_file:
|
||||||
|
grammar_json = json.loads(json_file.read())
|
||||||
|
for operand_kind in grammar_json['operand_kinds']:
|
||||||
|
if operand_kind['kind'] == 'Capability':
|
||||||
|
for cap in operand_kind['enumerants']:
|
||||||
|
capname = cap['enumerant']
|
||||||
|
capabilities.append(capname)
|
||||||
|
dependencies[capname] = {}
|
||||||
|
dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else []
|
||||||
|
dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else ""
|
||||||
|
|
||||||
|
capabilities.sort()
|
||||||
|
|
||||||
|
output = []
|
||||||
|
output.append(header_text)
|
||||||
|
output.append("// clang-format off")
|
||||||
|
if False:
|
||||||
|
for cap in capabilities:
|
||||||
|
deps = dependencies[cap]
|
||||||
|
extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions'])
|
||||||
|
|
||||||
|
output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format(
|
||||||
|
cap, extensions_str, deps['version']))
|
||||||
|
else:
|
||||||
|
for cap in capabilities:
|
||||||
|
deps = dependencies[cap]
|
||||||
|
if deps['version'] != "":
|
||||||
|
output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version']))
|
||||||
|
for ext in deps['extensions']:
|
||||||
|
output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext))
|
||||||
|
output.append("// clang-format on")
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, 'w') as output_file:
|
||||||
|
output_file.write('\n'.join(output))
|
||||||
|
else:
|
||||||
|
print('\n'.join(output))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size)
|
|||||||
pixelBytes = maxAllocSize / maxDimensionPixels;
|
pixelBytes = maxAllocSize / maxDimensionPixels;
|
||||||
if (pixelBytes == 0)
|
if (pixelBytes == 0)
|
||||||
{
|
{
|
||||||
log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
|
log_info(
|
||||||
"CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
|
"Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, "
|
||||||
"of maximum size!\n");
|
"therefore the size of the allocated image may be larger than the "
|
||||||
return -1;
|
"scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n",
|
||||||
|
maxDimensionPixels, maxAllocSize);
|
||||||
|
pixelBytes = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = -1;
|
error = -1;
|
||||||
|
|||||||
@@ -87,6 +87,35 @@ const char *sample_two_kernel_program[] = {
|
|||||||
"\n"
|
"\n"
|
||||||
"}\n" };
|
"}\n" };
|
||||||
|
|
||||||
|
const char *sample_mem_obj_size_test_kernel = R"(
|
||||||
|
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
|
||||||
|
{
|
||||||
|
size_t tid = get_global_id(0);
|
||||||
|
dst[tid] = src[tid];
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
const char *sample_local_size_test_kernel = R"(
|
||||||
|
__kernel void local_size_test(__local int *src, __global int *dst)
|
||||||
|
{
|
||||||
|
size_t tid = get_global_id(0);
|
||||||
|
dst[tid] = src[tid];
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
const char *sample_read_only_image_test_kernel = R"(
|
||||||
|
__kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src)
|
||||||
|
{
|
||||||
|
write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
const char *sample_write_only_image_test_kernel = R"(
|
||||||
|
__kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst)
|
||||||
|
{
|
||||||
|
dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1)));
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
REGISTER_TEST(get_kernel_info)
|
REGISTER_TEST(get_kernel_info)
|
||||||
{
|
{
|
||||||
@@ -669,7 +698,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
|
|||||||
test_error(error,
|
test_error(error,
|
||||||
"Unable to get sample_image_test kernel for built program");
|
"Unable to get sample_image_test kernel for built program");
|
||||||
|
|
||||||
std::vector<cl_uchar> mem_data(size_dim * size_dim);
|
std::vector<cl_uchar> mem_data(size_dim * size_dim * 4);
|
||||||
buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
|
buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
|
||||||
sizeof(cl_int) * size_dim, mem_data.data(), &error);
|
sizeof(cl_int) * size_dim, mem_data.data(), &error);
|
||||||
test_error(error, "clCreateBuffer failed");
|
test_error(error, "clCreateBuffer failed");
|
||||||
@@ -704,3 +733,169 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
|
|||||||
|
|
||||||
return TEST_PASS;
|
return TEST_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(negative_invalid_arg_mem_obj)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
clProgramWrapper program;
|
||||||
|
clKernelWrapper mem_obj_arg_kernel;
|
||||||
|
|
||||||
|
// Setup the test
|
||||||
|
error =
|
||||||
|
create_single_kernel_helper(context, &program, nullptr, 1,
|
||||||
|
&sample_mem_obj_size_test_kernel, nullptr);
|
||||||
|
test_error(error, "Unable to build test program");
|
||||||
|
|
||||||
|
mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error);
|
||||||
|
test_error(error,
|
||||||
|
"Unable to get mem_obj_size_test kernel for built program");
|
||||||
|
|
||||||
|
std::vector<cl_uchar> mem_data(256, 0);
|
||||||
|
clMemWrapper buffer = clCreateBuffer(
|
||||||
|
context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error);
|
||||||
|
test_error(error, "clCreateBuffer failed");
|
||||||
|
|
||||||
|
// Run the test - CL_INVALID_ARG_SIZE
|
||||||
|
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer);
|
||||||
|
test_failure_error_ret(
|
||||||
|
error, CL_INVALID_ARG_SIZE,
|
||||||
|
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||||
|
"argument is a memory object and arg_size > sizeof(cl_mem)",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer);
|
||||||
|
test_failure_error_ret(
|
||||||
|
error, CL_INVALID_ARG_SIZE,
|
||||||
|
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||||
|
"argument is a memory object and arg_size < sizeof(cl_mem)",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(negative_invalid_kernel)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
clKernelWrapper kernel;
|
||||||
|
|
||||||
|
clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||||
|
sizeof(cl_float), NULL, &error);
|
||||||
|
test_error(error, "clCreateBuffer failed");
|
||||||
|
|
||||||
|
// Run the test - CL_INVALID_KERNEL
|
||||||
|
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
|
||||||
|
test_failure_error_ret(
|
||||||
|
error, CL_INVALID_KERNEL,
|
||||||
|
"clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel "
|
||||||
|
"is not a valid kernel object",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(negative_invalid_arg_index)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
clProgramWrapper program;
|
||||||
|
clKernelWrapper kernel;
|
||||||
|
|
||||||
|
// Setup the test
|
||||||
|
error = create_single_kernel_helper(context, &program, nullptr, 1,
|
||||||
|
sample_single_test_kernel, nullptr);
|
||||||
|
test_error(error, "Unable to build test program");
|
||||||
|
|
||||||
|
kernel = clCreateKernel(program, "sample_test", &error);
|
||||||
|
test_error(error, "Unable to get sample_test kernel for built program");
|
||||||
|
|
||||||
|
// Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX
|
||||||
|
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr);
|
||||||
|
test_failure_error_ret(
|
||||||
|
error, CL_INVALID_ARG_INDEX,
|
||||||
|
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when "
|
||||||
|
"arg_index is not a valid argument index",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(negative_invalid_arg_size_local)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
clProgramWrapper program;
|
||||||
|
clKernelWrapper local_arg_kernel;
|
||||||
|
|
||||||
|
// Setup the test
|
||||||
|
error = create_single_kernel_helper(
|
||||||
|
context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr);
|
||||||
|
test_error(error, "Unable to build test program");
|
||||||
|
|
||||||
|
local_arg_kernel = clCreateKernel(program, "local_size_test", &error);
|
||||||
|
test_error(error, "Unable to get local_size_test kernel for built program");
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr);
|
||||||
|
test_failure_error_ret(
|
||||||
|
error, CL_INVALID_ARG_SIZE,
|
||||||
|
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is "
|
||||||
|
"passed to a local qualifier kernel argument",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(negative_set_read_write_image_arg)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
clProgramWrapper program;
|
||||||
|
clKernelWrapper write_image_kernel, read_image_kernel;
|
||||||
|
clMemWrapper write_only_image, read_only_image;
|
||||||
|
const char *test_kernels[2] = { sample_read_only_image_test_kernel,
|
||||||
|
sample_write_only_image_test_kernel };
|
||||||
|
constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||||
|
const int size_dim = 128;
|
||||||
|
|
||||||
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
|
||||||
|
|
||||||
|
// Setup the test
|
||||||
|
error = create_single_kernel_helper(context, &program, nullptr, 2,
|
||||||
|
test_kernels, nullptr);
|
||||||
|
test_error(error, "Unable to build test program");
|
||||||
|
|
||||||
|
read_image_kernel = clCreateKernel(program, "read_only_image_test", &error);
|
||||||
|
test_error(error,
|
||||||
|
"Unable to get read_only_image_test kernel for built program");
|
||||||
|
|
||||||
|
write_image_kernel =
|
||||||
|
clCreateKernel(program, "write_only_image_test", &error);
|
||||||
|
test_error(error,
|
||||||
|
"Unable to get write_only_image_test kernel for built program");
|
||||||
|
|
||||||
|
read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format,
|
||||||
|
size_dim, size_dim, 0, nullptr, &error);
|
||||||
|
test_error(error, "create_image_2d failed");
|
||||||
|
|
||||||
|
write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format,
|
||||||
|
size_dim, size_dim, 0, nullptr, &error);
|
||||||
|
test_error(error, "create_image_2d failed");
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image),
|
||||||
|
&read_only_image);
|
||||||
|
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
|
||||||
|
"clSetKernelArg is supposed to fail "
|
||||||
|
"with CL_INVALID_ARG_VALUE when an image is "
|
||||||
|
"created with CL_MEM_READ_ONLY is "
|
||||||
|
"passed to a write_only kernel argument",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image),
|
||||||
|
&write_only_image);
|
||||||
|
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
|
||||||
|
"clSetKernelArg is supposed to fail "
|
||||||
|
"with CL_INVALID_ARG_VALUE when an image is "
|
||||||
|
"created with CL_MEM_WRITE_ONLY is "
|
||||||
|
"passed to a read_only kernel argument",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|||||||
767
test_conformance/api/test_spirv_queries.cpp
Normal file
767
test_conformance/api/test_spirv_queries.cpp
Normal file
@@ -0,0 +1,767 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#include "testBase.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define SPV_ENABLE_UTILITY_CODE
|
||||||
|
#include <spirv/unified1/spirv.hpp>
|
||||||
|
|
||||||
|
static bool is_spirv_version_supported(cl_device_id deviceID,
|
||||||
|
const std::string& version)
|
||||||
|
{
|
||||||
|
std::string ilVersions = get_device_il_version_string(deviceID);
|
||||||
|
return ilVersions.find(version) != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int doQueries(cl_device_id device,
|
||||||
|
std::vector<const char*>& extendedInstructionSets,
|
||||||
|
std::vector<const char*>& extensions,
|
||||||
|
std::vector<cl_uint>& capabilities)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
|
||||||
|
size_t size = 0;
|
||||||
|
error =
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
|
||||||
|
0, nullptr, &size);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n");
|
||||||
|
|
||||||
|
extendedInstructionSets.resize(size / sizeof(const char*));
|
||||||
|
error =
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
|
||||||
|
size, extendedInstructionSets.data(), nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n");
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr,
|
||||||
|
&size);
|
||||||
|
test_error(
|
||||||
|
error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n");
|
||||||
|
|
||||||
|
extensions.resize(size / sizeof(const char*));
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size,
|
||||||
|
extensions.data(), nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n");
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0,
|
||||||
|
nullptr, &size);
|
||||||
|
test_error(
|
||||||
|
error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n");
|
||||||
|
|
||||||
|
capabilities.resize(size / sizeof(cl_uint));
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size,
|
||||||
|
capabilities.data(), nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int findRequirements(cl_device_id device,
|
||||||
|
std::vector<const char*>& extendedInstructionSets,
|
||||||
|
std::vector<const char*>& extensions,
|
||||||
|
std::vector<cl_uint>& capabilities)
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
|
||||||
|
auto version = get_device_cl_version(device);
|
||||||
|
auto ilVersions = get_device_il_version_string(device);
|
||||||
|
|
||||||
|
// If no SPIR-V versions are supported, there are no requirements.
|
||||||
|
if (ilVersions.find("SPIR-V") == std::string::npos)
|
||||||
|
{
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_bool deviceImageSupport = CL_FALSE;
|
||||||
|
cl_bool deviceReadWriteImageSupport = CL_FALSE;
|
||||||
|
cl_bool deviceSubGroupsSupport = CL_FALSE;
|
||||||
|
cl_bool deviceGenericAddressSpaceSupport = CL_FALSE;
|
||||||
|
cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE;
|
||||||
|
cl_bool devicePipeSupport = CL_FALSE;
|
||||||
|
cl_bool deviceDeviceEnqueueSupport = CL_FALSE;
|
||||||
|
cl_device_integer_dot_product_capabilities_khr
|
||||||
|
deviceIntegerDotProductCapabilities = 0;
|
||||||
|
cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0;
|
||||||
|
cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0;
|
||||||
|
cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0;
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
|
||||||
|
sizeof(deviceImageSupport), &deviceImageSupport,
|
||||||
|
nullptr);
|
||||||
|
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n");
|
||||||
|
|
||||||
|
if (version >= Version(2, 0))
|
||||||
|
{
|
||||||
|
cl_uint deviceMaxReadWriteImageArgs = 0;
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
|
||||||
|
sizeof(deviceMaxReadWriteImageArgs),
|
||||||
|
&deviceMaxReadWriteImageArgs, nullptr);
|
||||||
|
test_error(
|
||||||
|
error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
|
||||||
|
|
||||||
|
deviceReadWriteImageSupport =
|
||||||
|
deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (version >= Version(2, 1))
|
||||||
|
{
|
||||||
|
cl_uint deviceMaxNumSubGroups = 0;
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS,
|
||||||
|
sizeof(deviceMaxNumSubGroups),
|
||||||
|
&deviceMaxNumSubGroups, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n");
|
||||||
|
|
||||||
|
deviceSubGroupsSupport =
|
||||||
|
deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE;
|
||||||
|
}
|
||||||
|
else if (is_extension_available(device, "cl_khr_subgroups"))
|
||||||
|
{
|
||||||
|
deviceSubGroupsSupport = CL_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (version >= Version(3, 0))
|
||||||
|
{
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
|
||||||
|
sizeof(deviceGenericAddressSpaceSupport),
|
||||||
|
&deviceGenericAddressSpaceSupport, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n");
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(
|
||||||
|
device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT,
|
||||||
|
sizeof(deviceWorkGroupCollectiveFunctionsSupport),
|
||||||
|
&deviceWorkGroupCollectiveFunctionsSupport, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n");
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT,
|
||||||
|
sizeof(devicePipeSupport), &devicePipeSupport,
|
||||||
|
nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n");
|
||||||
|
|
||||||
|
cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities =
|
||||||
|
0;
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
|
||||||
|
sizeof(deviceDeviceEnqueueCapabilities),
|
||||||
|
&deviceDeviceEnqueueCapabilities, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n");
|
||||||
|
|
||||||
|
deviceDeviceEnqueueSupport =
|
||||||
|
deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE;
|
||||||
|
}
|
||||||
|
else if (version >= Version(2, 0))
|
||||||
|
{
|
||||||
|
deviceGenericAddressSpaceSupport = CL_TRUE;
|
||||||
|
deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE;
|
||||||
|
devicePipeSupport = CL_TRUE;
|
||||||
|
deviceDeviceEnqueueSupport = CL_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_extension_available(device, "cl_khr_integer_dot_product"))
|
||||||
|
{
|
||||||
|
error = clGetDeviceInfo(device,
|
||||||
|
CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
|
||||||
|
sizeof(deviceIntegerDotProductCapabilities),
|
||||||
|
&deviceIntegerDotProductCapabilities, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics"))
|
||||||
|
{
|
||||||
|
error =
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||||
|
sizeof(deviceFp32AtomicCapabilities),
|
||||||
|
&deviceFp32AtomicCapabilities, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||||
|
|
||||||
|
error =
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
|
||||||
|
sizeof(deviceFp16AtomicCapabilities),
|
||||||
|
&deviceFp16AtomicCapabilities, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||||
|
|
||||||
|
error =
|
||||||
|
clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||||
|
sizeof(deviceFp64AtomicCapabilities),
|
||||||
|
&deviceFp64AtomicCapabilities, nullptr);
|
||||||
|
test_error(error,
|
||||||
|
"clGetDeviceInfo failed for "
|
||||||
|
"CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required.
|
||||||
|
extendedInstructionSets.push_back("OpenCL.std");
|
||||||
|
|
||||||
|
capabilities.push_back(spv::CapabilityAddresses);
|
||||||
|
capabilities.push_back(spv::CapabilityFloat16Buffer);
|
||||||
|
capabilities.push_back(spv::CapabilityInt16);
|
||||||
|
capabilities.push_back(spv::CapabilityInt8);
|
||||||
|
capabilities.push_back(spv::CapabilityKernel);
|
||||||
|
capabilities.push_back(spv::CapabilityLinkage);
|
||||||
|
capabilities.push_back(spv::CapabilityVector16);
|
||||||
|
|
||||||
|
// Required for FULL_PROFILE devices, or devices supporting
|
||||||
|
// cles_khr_int64.
|
||||||
|
if (gHasLong)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityInt64);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting images.
|
||||||
|
if (deviceImageSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityImage1D);
|
||||||
|
capabilities.push_back(spv::CapabilityImageBasic);
|
||||||
|
capabilities.push_back(spv::CapabilityImageBuffer);
|
||||||
|
capabilities.push_back(spv::CapabilityLiteralSampler);
|
||||||
|
capabilities.push_back(spv::CapabilitySampled1D);
|
||||||
|
capabilities.push_back(spv::CapabilitySampledBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting SPIR-V 1.6.
|
||||||
|
if (ilVersions.find("SPIR-V_1.6") != std::string::npos)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityUniformDecoration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting read-write images.
|
||||||
|
if (deviceReadWriteImageSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityImageReadWrite);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting the generic address space.
|
||||||
|
if (deviceGenericAddressSpaceSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGenericPointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting sub-groups or work-group collective
|
||||||
|
// functions.
|
||||||
|
if (deviceSubGroupsSupport == CL_TRUE
|
||||||
|
|| deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroups);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting pipes.
|
||||||
|
if (devicePipeSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityPipes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting device-side enqueue.
|
||||||
|
if (deviceDeviceEnqueueSupport == CL_TRUE)
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityDeviceEnqueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting SPIR-V 1.1 and OpenCL 2.2.
|
||||||
|
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
|
||||||
|
&& version == Version(2, 2))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityPipeStorage);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or
|
||||||
|
// OpenCL 3.0 devices supporting sub-groups.
|
||||||
|
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
|
||||||
|
&& (version == Version(2, 2)
|
||||||
|
|| (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupDispatch);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_expect_assume.
|
||||||
|
if (is_extension_available(device, "cl_khr_expect_assume"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_expect_assume");
|
||||||
|
capabilities.push_back(spv::CapabilityExpectAssumeKHR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_extended_bit_ops.
|
||||||
|
if (is_extension_available(device, "cl_khr_extended_bit_ops"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_bit_instructions");
|
||||||
|
capabilities.push_back(spv::CapabilityBitInstructions);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting half-precision floating-point
|
||||||
|
// (cl_khr_fp16).
|
||||||
|
if (is_extension_available(device, "cl_khr_fp16"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityFloat16);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting double-precision floating-point
|
||||||
|
// (cl_khr_fp64).
|
||||||
|
if (is_extension_available(device, "cl_khr_fp64"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityFloat64);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting 64-bit atomics
|
||||||
|
// (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics).
|
||||||
|
if (is_extension_available(device, "cl_khr_int64_base_atomics")
|
||||||
|
|| is_extension_available(device, "cl_khr_int64_extended_atomics"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityInt64Atomics);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_integer_dot_product.
|
||||||
|
if (is_extension_available(device, "cl_khr_integer_dot_product"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_integer_dot_product");
|
||||||
|
capabilities.push_back(spv::CapabilityDotProduct);
|
||||||
|
capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_integer_dot_product and
|
||||||
|
// CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR.
|
||||||
|
if (is_extension_available(device, "cl_khr_integer_dot_product")
|
||||||
|
&& (deviceIntegerDotProductCapabilities
|
||||||
|
& CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityDotProductInput4x8Bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_kernel_clock.
|
||||||
|
if (is_extension_available(device, "cl_khr_kernel_clock"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_shader_clock");
|
||||||
|
capabilities.push_back(spv::CapabilityShaderClockKHR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting both cl_khr_mipmap_image and
|
||||||
|
// cl_khr_mipmap_image_writes.
|
||||||
|
if (is_extension_available(device, "cl_khr_mipmap_image")
|
||||||
|
&& is_extension_available(device, "cl_khr_mipmap_image_writes"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityImageMipmap);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_spirv_extended_debug_info.
|
||||||
|
if (is_extension_available(device, "cl_khr_spirv_extended_debug_info"))
|
||||||
|
{
|
||||||
|
extendedInstructionSets.push_back("OpenCL.DebugInfo.100");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_spirv_linkonce_odr.
|
||||||
|
if (is_extension_available(device, "cl_khr_spirv_linkonce_odr"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_linkonce_odr");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting
|
||||||
|
// cl_khr_spirv_no_integer_wrap_decoration.
|
||||||
|
if (is_extension_available(device,
|
||||||
|
"cl_khr_spirv_no_integer_wrap_decoration"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_no_integer_wrap_decoration");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_ballot.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_ballot"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformBallot);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_clustered_reduce.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformClustered);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_named_barrier.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_named_barrier"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityNamedBarrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting
|
||||||
|
// cl_khr_subgroup_non_uniform_arithmetic.
|
||||||
|
if (is_extension_available(device,
|
||||||
|
"cl_khr_subgroup_non_uniform_arithmetic"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_non_uniform_vote.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniform);
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformVote);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_rotate.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_rotate"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_subgroup_rotate");
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_shuffle.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_shuffle"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformShuffle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_subgroup_shuffle_relative.
|
||||||
|
if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_khr_work_group_uniform_arithmetic.
|
||||||
|
if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_KHR_uniform_group_instructions");
|
||||||
|
capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
|
||||||
|
// adds.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp32AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
|
||||||
|
// min and max.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp32AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
|
||||||
|
// adds.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp16AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_EXT_shader_atomic_float16_add");
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
|
||||||
|
// min and max.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp16AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
|
||||||
|
// adds.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp64AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
|
||||||
|
// min and max.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& (deviceFp64AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||||
|
{
|
||||||
|
capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp16, fp32,
|
||||||
|
// or fp64 atomic min or max.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& ((deviceFp32AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|
||||||
|
|| (deviceFp16AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|
||||||
|
|| (deviceFp64AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_EXT_shader_atomic_float_min_max");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_ext_float_atomics and fp32 or fp64
|
||||||
|
// atomic adds.
|
||||||
|
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||||
|
&& ((deviceFp32AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))
|
||||||
|
|| (deviceFp64AtomicCapabilities
|
||||||
|
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||||
|
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_EXT_shader_atomic_float_add");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_intel_bfloat16_conversions.
|
||||||
|
if (is_extension_available(device, "cl_intel_bfloat16_conversions"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_bfloat16_conversion");
|
||||||
|
capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting
|
||||||
|
// cl_intel_spirv_device_side_avc_motion_estimation.
|
||||||
|
if (is_extension_available(
|
||||||
|
device, "cl_intel_spirv_device_side_avc_motion_estimation"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation");
|
||||||
|
capabilities.push_back(
|
||||||
|
spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL);
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL);
|
||||||
|
capabilities.push_back(
|
||||||
|
spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_intel_spirv_media_block_io.
|
||||||
|
if (is_extension_available(device, "cl_intel_spirv_media_block_io"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_media_block_io");
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_intel_spirv_subgroups.
|
||||||
|
if (is_extension_available(device, "cl_intel_spirv_subgroups"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_subgroups");
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL);
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL);
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_intel_split_work_group_barrier.
|
||||||
|
if (is_extension_available(device, "cl_intel_split_work_group_barrier"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_split_barrier");
|
||||||
|
capabilities.push_back(spv::CapabilitySplitBarrierINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Required for devices supporting cl_intel_subgroup_buffer_prefetch.
|
||||||
|
if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch"))
|
||||||
|
{
|
||||||
|
extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch");
|
||||||
|
capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(spirv_query_requirements)
|
||||||
|
{
|
||||||
|
if (!is_extension_available(device, "cl_khr_spirv_queries"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int error;
|
||||||
|
|
||||||
|
std::vector<const char*> queriedExtendedInstructionSets;
|
||||||
|
std::vector<const char*> queriedExtensions;
|
||||||
|
std::vector<cl_uint> queriedCapabilities;
|
||||||
|
|
||||||
|
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
|
||||||
|
queriedCapabilities);
|
||||||
|
test_error_fail(error, "Unable to perform SPIR-V queries");
|
||||||
|
|
||||||
|
std::vector<const char*> requiredExtendedInstructionSets;
|
||||||
|
std::vector<const char*> requiredExtensions;
|
||||||
|
std::vector<cl_uint> requiredCapabilities;
|
||||||
|
error = findRequirements(device, requiredExtendedInstructionSets,
|
||||||
|
requiredExtensions, requiredCapabilities);
|
||||||
|
test_error_fail(error, "Unable to find SPIR-V requirements");
|
||||||
|
|
||||||
|
for (auto check : requiredExtendedInstructionSets)
|
||||||
|
{
|
||||||
|
auto cmp = [=](const char* queried) {
|
||||||
|
return strcmp(check, queried) == 0;
|
||||||
|
};
|
||||||
|
auto it = std::find_if(queriedExtendedInstructionSets.begin(),
|
||||||
|
queriedExtendedInstructionSets.end(), cmp);
|
||||||
|
if (it == queriedExtendedInstructionSets.end())
|
||||||
|
{
|
||||||
|
test_fail("Missing required extended instruction set: %s\n", check);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto check : requiredExtensions)
|
||||||
|
{
|
||||||
|
auto cmp = [=](const char* queried) {
|
||||||
|
return strcmp(check, queried) == 0;
|
||||||
|
};
|
||||||
|
auto it = std::find_if(queriedExtensions.begin(),
|
||||||
|
queriedExtensions.end(), cmp);
|
||||||
|
if (it == queriedExtensions.end())
|
||||||
|
{
|
||||||
|
test_fail("Missing required extension: %s\n", check);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto check : requiredCapabilities)
|
||||||
|
{
|
||||||
|
if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(),
|
||||||
|
check)
|
||||||
|
== queriedCapabilities.end())
|
||||||
|
{
|
||||||
|
test_fail(
|
||||||
|
"Missing required capability: %s\n",
|
||||||
|
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find any extraneous capabilities (informational):
|
||||||
|
for (auto check : queriedCapabilities)
|
||||||
|
{
|
||||||
|
if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(),
|
||||||
|
check)
|
||||||
|
== requiredCapabilities.end())
|
||||||
|
{
|
||||||
|
log_info(
|
||||||
|
"Found non-required capability: %s\n",
|
||||||
|
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(spirv_query_dependencies)
|
||||||
|
{
|
||||||
|
if (!is_extension_available(device, "cl_khr_spirv_queries"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int error;
|
||||||
|
|
||||||
|
std::vector<const char*> queriedExtendedInstructionSets;
|
||||||
|
std::vector<const char*> queriedExtensions;
|
||||||
|
std::vector<cl_uint> queriedCapabilities;
|
||||||
|
|
||||||
|
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
|
||||||
|
queriedCapabilities);
|
||||||
|
test_error_fail(error, "Unable to perform SPIR-V queries");
|
||||||
|
|
||||||
|
struct CapabilityDependencies
|
||||||
|
{
|
||||||
|
std::vector<std::string> extensions;
|
||||||
|
std::string version;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<spv::Capability, CapabilityDependencies> dependencies;
|
||||||
|
|
||||||
|
#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \
|
||||||
|
dependencies[spv::Capability##_cap].version = _ver;
|
||||||
|
#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \
|
||||||
|
dependencies[spv::Capability##_cap].extensions.push_back(_ext);
|
||||||
|
#include "spirv_capability_deps.def"
|
||||||
|
|
||||||
|
// For each queried SPIR-V capability, ensure that either that any SPIR-V
|
||||||
|
// version dependencies or SPIR-V extension dependencies are satisfied.
|
||||||
|
|
||||||
|
for (auto check : queriedCapabilities)
|
||||||
|
{
|
||||||
|
// Log and skip any unknown capabilities
|
||||||
|
auto it = dependencies.find(static_cast<spv::Capability>(check));
|
||||||
|
if (it == dependencies.end())
|
||||||
|
{
|
||||||
|
log_info(
|
||||||
|
"No known dependencies for queried capability %s!\n",
|
||||||
|
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a SPIR-V version dependency is satisfied
|
||||||
|
const auto& version_dep = it->second.version;
|
||||||
|
if (!version_dep.empty()
|
||||||
|
&& is_spirv_version_supported(device, version_dep))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a SPIR-V extension dependency is satisfied
|
||||||
|
bool found = false;
|
||||||
|
for (const auto& extension_dep : it->second.extensions)
|
||||||
|
{
|
||||||
|
if (std::find(queriedExtensions.begin(), queriedExtensions.end(),
|
||||||
|
extension_dep)
|
||||||
|
!= queriedExtensions.end())
|
||||||
|
{
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get here then the capability has an unsatisfied dependency.
|
||||||
|
log_error("Couldn't find a dependency for queried capability %s!\n",
|
||||||
|
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||||
|
if (!version_dep.empty())
|
||||||
|
{
|
||||||
|
log_error("Checked for SPIR-V version %s.\n", version_dep.c_str());
|
||||||
|
}
|
||||||
|
for (const auto& extension_dep : it->second.extensions)
|
||||||
|
{
|
||||||
|
log_error("Checked for SPIR-V extension %s.n",
|
||||||
|
extension_dep.c_str());
|
||||||
|
}
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
@@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1))
|
|||||||
nullptr);
|
nullptr);
|
||||||
test_error(error, "clGetDeviceInfo failed");
|
test_error(error, "clGetDeviceInfo failed");
|
||||||
|
|
||||||
max_local = max_work_item_sizes[0];
|
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||||
|
sizeof(max_local), &max_local, nullptr);
|
||||||
|
test_error(error, "clGetKernelWorkGroupInfo failed");
|
||||||
|
|
||||||
|
max_local = std::min(max_local, max_work_item_sizes[0]);
|
||||||
|
|
||||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||||
(void *)&platform, NULL);
|
(void *)&platform, NULL);
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
set(MODULE_NAME BASIC)
|
set(MODULE_NAME BASIC)
|
||||||
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
|
||||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(${MODULE_NAME}_SOURCES
|
set(${MODULE_NAME}_SOURCES
|
||||||
main.cpp
|
main.cpp
|
||||||
test_fpmath.cpp
|
test_fpmath.cpp
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy)
|
|||||||
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
|
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
|
||||||
test_error(err, "clSetKernelArg failed");
|
test_error(err, "clSetKernelArg failed");
|
||||||
|
|
||||||
size_t threads[3] = { num_elements, 0, 0 };
|
size_t threads[3] = { static_cast<size_t>(num_elements), 0, 0 };
|
||||||
|
|
||||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||||
|
|||||||
@@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format(
|
|||||||
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
||||||
imgptr{ nullptr, free };
|
imgptr{ nullptr, free };
|
||||||
clMemWrapper buffer, image;
|
clMemWrapper buffer, image;
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||||
size_t elem_size;
|
size_t elem_size;
|
||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
using std::isnan;
|
|
||||||
#include "harness/compat.h"
|
#include "harness/compat.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@@ -26,6 +25,7 @@ using std::isnan;
|
|||||||
|
|
||||||
#include <CL/cl_half.h>
|
#include <CL/cl_half.h>
|
||||||
#include "harness/conversions.h"
|
#include "harness/conversions.h"
|
||||||
|
#include "harness/mathHelpers.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
|
||||||
extern cl_half_rounding_mode halfRoundingMode;
|
extern cl_half_rounding_mode halfRoundingMode;
|
||||||
@@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = {
|
|||||||
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
bool IsHalfNaN(cl_half v)
|
|
||||||
{
|
|
||||||
// Extract FP16 exponent and mantissa
|
|
||||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
|
||||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
|
||||||
|
|
||||||
// NaN test
|
|
||||||
return (h_exp == 0x1F && h_mant != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int test_explicit_s2v_function(cl_context context,
|
static int test_explicit_s2v_function(cl_context context,
|
||||||
cl_command_queue queue, cl_kernel kernel,
|
cl_command_queue queue, cl_kernel kernel,
|
||||||
ExplicitType srcType, unsigned int count,
|
ExplicitType srcType, unsigned int count,
|
||||||
@@ -183,19 +173,20 @@ static int test_explicit_s2v_function(cl_context context,
|
|||||||
{
|
{
|
||||||
bool isSrcNaN =
|
bool isSrcNaN =
|
||||||
(((srcType == kHalf)
|
(((srcType == kHalf)
|
||||||
&& IsHalfNaN(*reinterpret_cast<cl_half *>(inPtr)))
|
&& isnan_fp(*reinterpret_cast<cl_half *>(inPtr)))
|
||||||
|| ((srcType == kFloat)
|
|| ((srcType == kFloat)
|
||||||
&& isnan(*reinterpret_cast<cl_float *>(inPtr)))
|
&& isnan_fp(*reinterpret_cast<cl_float *>(inPtr)))
|
||||||
|| ((srcType == kDouble)
|
|| ((srcType == kDouble)
|
||||||
&& isnan(*reinterpret_cast<cl_double *>(inPtr))));
|
&& isnan_fp(*reinterpret_cast<cl_double *>(inPtr))));
|
||||||
bool isDestNaN = (((destType == kHalf)
|
bool isDestNaN =
|
||||||
&& IsHalfNaN(*reinterpret_cast<cl_half *>(
|
(((destType == kHalf)
|
||||||
|
&& isnan_fp(*reinterpret_cast<cl_half *>(
|
||||||
outPtr + destTypeSize * s)))
|
outPtr + destTypeSize * s)))
|
||||||
|| ((destType == kFloat)
|
|| ((destType == kFloat)
|
||||||
&& isnan(*reinterpret_cast<cl_float *>(
|
&& isnan_fp(*reinterpret_cast<cl_float *>(
|
||||||
outPtr + destTypeSize * s)))
|
outPtr + destTypeSize * s)))
|
||||||
|| ((destType == kDouble)
|
|| ((destType == kDouble)
|
||||||
&& isnan(*reinterpret_cast<cl_double *>(
|
&& isnan_fp(*reinterpret_cast<cl_double *>(
|
||||||
outPtr + destTypeSize * s))));
|
outPtr + destTypeSize * s))));
|
||||||
|
|
||||||
if (isSrcNaN && isDestNaN)
|
if (isSrcNaN && isDestNaN)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
#include "harness/compat.h"
|
#include "harness/compat.h"
|
||||||
|
#include "harness/mathHelpers.h"
|
||||||
#include "harness/rounding_mode.h"
|
#include "harness/rounding_mode.h"
|
||||||
#include "harness/stringHelpers.h"
|
#include "harness/stringHelpers.h"
|
||||||
|
|
||||||
@@ -57,16 +58,6 @@ template <typename T> double toDouble(T val)
|
|||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isHalfNan(cl_half v)
|
|
||||||
{
|
|
||||||
// Extract FP16 exponent and mantissa
|
|
||||||
uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
|
||||||
uint16_t h_mant = v & 0x3FF;
|
|
||||||
|
|
||||||
// NaN test
|
|
||||||
return (h_exp == 0x1F && h_mant != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_half half_plus(cl_half a, cl_half b)
|
cl_half half_plus(cl_half a, cl_half b)
|
||||||
{
|
{
|
||||||
return HFF(std::plus<float>()(HTF(a), HTF(b)));
|
return HFF(std::plus<float>()(HTF(a), HTF(b)));
|
||||||
@@ -101,14 +92,7 @@ int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
|
|||||||
T r = test.ref(inA[i], inB[i]);
|
T r = test.ref(inA[i], inB[i]);
|
||||||
bool both_nan = false;
|
bool both_nan = false;
|
||||||
|
|
||||||
if (std::is_same<T, cl_half>::value)
|
both_nan = isnan_fp(r) && isnan_fp(output[i]);
|
||||||
{
|
|
||||||
both_nan = isHalfNan(r) && isHalfNan(output[i]);
|
|
||||||
}
|
|
||||||
else if (std::is_floating_point<T>::value)
|
|
||||||
{
|
|
||||||
both_nan = std::isnan(r) && std::isnan(output[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If not both nan, check if the result is the same
|
// If not both nan, check if the result is the same
|
||||||
if (!both_nan && (r != output[i]))
|
if (!both_nan && (r != output[i]))
|
||||||
|
|||||||
@@ -100,8 +100,8 @@ REGISTER_TEST(hostptr)
|
|||||||
cl_image_format img_format;
|
cl_image_format img_format;
|
||||||
cl_uchar *rgba8_inptr, *rgba8_outptr;
|
cl_uchar *rgba8_inptr, *rgba8_outptr;
|
||||||
void *lock_buffer;
|
void *lock_buffer;
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
MTdata d;
|
MTdata d;
|
||||||
RoundingMode oldRoundMode;
|
RoundingMode oldRoundMode;
|
||||||
|
|||||||
@@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst)
|
|||||||
int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
|
int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
|
||||||
{
|
{
|
||||||
const cl_int results[] = {
|
const cl_int results[] = {
|
||||||
0x12345678, 0x23456781, 0x34567812, 0x45678123,
|
(cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812,
|
||||||
0x56781234, 0x67812345, 0x78123456, 0x81234567,
|
(cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345,
|
||||||
|
(cl_int)0x78123456, (cl_int)0x81234567,
|
||||||
};
|
};
|
||||||
|
|
||||||
auto predicate = [&results](cl_int a, cl_int b) {
|
auto predicate = [&results](cl_int a, cl_int b) {
|
||||||
|
|||||||
@@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int
|
|||||||
|
|
||||||
REGISTER_TEST(image_multipass_integer_coord)
|
REGISTER_TEST(image_multipass_integer_coord)
|
||||||
{
|
{
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
cl_image_format img_format;
|
cl_image_format img_format;
|
||||||
|
|
||||||
int num_input_streams = 8;
|
int num_input_streams = 8;
|
||||||
@@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord)
|
|||||||
|
|
||||||
REGISTER_TEST(image_multipass_float_coord)
|
REGISTER_TEST(image_multipass_float_coord)
|
||||||
{
|
{
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
cl_image_format img_format;
|
cl_image_format img_format;
|
||||||
|
|
||||||
int num_input_streams = 8;
|
int num_input_streams = 8;
|
||||||
|
|||||||
@@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format(
|
|||||||
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
||||||
imgptr{ nullptr, free };
|
imgptr{ nullptr, free };
|
||||||
clMemWrapper buffer, image;
|
clMemWrapper buffer, image;
|
||||||
const int img_width = 512;
|
const size_t img_width = 512;
|
||||||
const int img_height = 512;
|
const size_t img_height = 512;
|
||||||
const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||||
size_t elem_size;
|
size_t elem_size;
|
||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
|
|||||||
@@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
|||||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||||
clMemWrapper streams[6];
|
clMemWrapper streams[6];
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int i, err;
|
int i, err;
|
||||||
MTdataHolder d(gRandomSeed);
|
MTdataHolder d(gRandomSeed);
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
|||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
void *p, *outp;
|
void *p, *outp;
|
||||||
int x, y, delta_w = img_width / 8, delta_h = img_height / 16;
|
size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16;
|
||||||
|
|
||||||
switch (i)
|
switch (i)
|
||||||
{
|
{
|
||||||
@@ -197,8 +197,9 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
|||||||
copy_origin, copy_region, 0, NULL, NULL);
|
copy_origin, copy_region, 0, NULL, NULL);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("Copy %d (origin [%d, %d], size [%d, %d], image "
|
log_error(
|
||||||
"size [%d x %d]) Failed\n",
|
"Copy %d (origin [%zu, %zu], size [%zu, %zu], image "
|
||||||
|
"size [%zu x %zu]) Failed\n",
|
||||||
copy_number, x, y, delta_w, delta_h, img_width,
|
copy_number, x, y, delta_w, delta_h, img_width,
|
||||||
img_height);
|
img_height);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context,
|
|||||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||||
clMemWrapper streams[6];
|
clMemWrapper streams[6];
|
||||||
int img_width = 128;
|
size_t img_width = 128;
|
||||||
int img_height = 128;
|
size_t img_height = 128;
|
||||||
int img_depth = 64;
|
size_t img_depth = 64;
|
||||||
int i;
|
int i;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
unsigned num_elements = img_width * img_height * img_depth * 4;
|
unsigned num_elements = img_width * img_height * img_depth * 4;
|
||||||
|
|||||||
@@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot)
|
|||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
size_t global_threads[3], local_threads[3];
|
size_t global_threads[3], local_threads[3];
|
||||||
size_t local_workgroup_size;
|
size_t local_workgroup_size;
|
||||||
int img_width;
|
size_t img_width;
|
||||||
int img_height;
|
size_t img_height;
|
||||||
int err;
|
int err;
|
||||||
cl_uint m;
|
cl_uint m;
|
||||||
size_t max_local_workgroup_size[3];
|
size_t max_local_workgroup_size[3];
|
||||||
|
|||||||
@@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy)
|
|||||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||||
float *rgbafp_inptr, *rgbafp_outptr;
|
float *rgbafp_inptr, *rgbafp_outptr;
|
||||||
clMemWrapper streams[6];
|
clMemWrapper streams[6];
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int i, j;
|
int i, j;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
MTdata d;
|
MTdata d;
|
||||||
|
|
||||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||||
|
|
||||||
log_info("Testing with image %d x %d.\n", img_width, img_height);
|
log_info("Testing with image %zu x %zu.\n", img_width, img_height);
|
||||||
|
|
||||||
d = init_genrand( gRandomSeed );
|
d = init_genrand( gRandomSeed );
|
||||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||||
@@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
|
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
|
||||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin,
|
||||||
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
|
region, 0, 0, p, 0, NULL, NULL);
|
||||||
test_error(err, "clEnqueueWriteImage failed");
|
test_error(err, "clEnqueueWriteImage failed");
|
||||||
|
|
||||||
for (j=0; j<NUM_COPIES; j++)
|
for (j=0; j<NUM_COPIES; j++)
|
||||||
|
|||||||
@@ -195,8 +195,8 @@ REGISTER_TEST(imagereadwrite)
|
|||||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||||
clMemWrapper streams[3];
|
clMemWrapper streams[3];
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int num_tries = 200;
|
int num_tries = 200;
|
||||||
int i, j, err;
|
int i, j, err;
|
||||||
MTdataHolder d(gRandomSeed);
|
MTdataHolder d(gRandomSeed);
|
||||||
@@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite)
|
|||||||
|
|
||||||
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
||||||
{
|
{
|
||||||
int x = (int)get_random_float(0, img_width, d);
|
size_t x = (size_t)get_random_float(0, img_width, d);
|
||||||
int y = (int)get_random_float(0, img_height, d);
|
size_t y = (size_t)get_random_float(0, img_height, d);
|
||||||
int w = (int)get_random_float(1, (img_width - x), d);
|
size_t w = (size_t)get_random_float(1, (img_width - x), d);
|
||||||
int h = (int)get_random_float(1, (img_height - y), d);
|
size_t h = (size_t)get_random_float(1, (img_height - y), d);
|
||||||
size_t input_pitch;
|
size_t input_pitch;
|
||||||
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
|
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
|
||||||
int packed_update = (int)(genrand_int32(d) & 0x01);
|
int packed_update = (int)(genrand_int32(d) & 0x01);
|
||||||
@@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite)
|
|||||||
img_width, img_height);
|
img_width, img_height);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||||
|
y, w, h, (int)input_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite)
|
|||||||
img_width, img_height);
|
img_width, img_height);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||||
|
y, w, h, (int)input_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite)
|
|||||||
img_width, img_height);
|
img_width, img_height);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||||
|
y, w, h, (int)input_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d)
|
|||||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||||
clMemWrapper streams[3];
|
clMemWrapper streams[3];
|
||||||
int img_width = 64;
|
size_t img_width = 64;
|
||||||
int img_height = 64;
|
size_t img_height = 64;
|
||||||
int img_depth = 32;
|
size_t img_depth = 32;
|
||||||
int img_slice = img_width * img_height;
|
size_t img_slice = img_width * img_height;
|
||||||
int num_tries = 30;
|
int num_tries = 30;
|
||||||
int i, j, err;
|
int i, j, err;
|
||||||
MTdataHolder mtData(gRandomSeed);
|
MTdataHolder mtData(gRandomSeed);
|
||||||
@@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d)
|
|||||||
|
|
||||||
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
||||||
{
|
{
|
||||||
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
|
size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData);
|
||||||
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
|
size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData);
|
||||||
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
|
size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData);
|
||||||
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
|
size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData);
|
||||||
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
|
size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData);
|
||||||
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
|
size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData);
|
||||||
size_t input_pitch, input_slice_pitch;
|
size_t input_pitch, input_slice_pitch;
|
||||||
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
|
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
|
||||||
int packed_update = (int)(genrand_int32(mtData) & 0x01);
|
int packed_update = (int)(genrand_int32(mtData) & 0x01);
|
||||||
@@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d)
|
|||||||
img_width, img_height, img_depth);
|
img_width, img_height, img_depth);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||||
|
"slice_pitch=%d, try=%d\n",
|
||||||
|
x, y, z, w, h, d, (int)input_pitch,
|
||||||
|
(int)input_slice_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d)
|
|||||||
img_width, img_height, img_depth);
|
img_width, img_height, img_depth);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||||
|
"slice_pitch=%d, try=%d\n",
|
||||||
|
x, y, z, w, h, d, (int)input_pitch,
|
||||||
|
(int)input_slice_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d)
|
|||||||
img_width, img_height, img_depth);
|
img_width, img_height, img_depth);
|
||||||
if (err)
|
if (err)
|
||||||
{
|
{
|
||||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||||
|
"slice_pitch=%d, try=%d\n",
|
||||||
|
x, y, z, w, h, d, (int)input_pitch,
|
||||||
|
(int)input_slice_pitch, (int)i);
|
||||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function)
|
|||||||
clKernelWrapper kernel1, kernel2, kernel_to_call;
|
clKernelWrapper kernel1, kernel2, kernel_to_call;
|
||||||
clMemWrapper streams[2];
|
clMemWrapper streams[2];
|
||||||
|
|
||||||
size_t threads[] = {num_elements,1,1};
|
size_t threads[] = { static_cast<size_t>(num_elements), 1, 1 };
|
||||||
cl_int *input, *output, *expected;
|
cl_int *input, *output, *expected;
|
||||||
cl_int times = 4;
|
cl_int times = 4;
|
||||||
int pass = 0;
|
int pass = 0;
|
||||||
|
|||||||
@@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple)
|
|||||||
cl_program program;
|
cl_program program;
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
size_t threads[2];
|
size_t threads[2];
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int i, err;
|
int i, err;
|
||||||
MTdata d;
|
MTdata d;
|
||||||
|
|
||||||
|
|||||||
@@ -100,8 +100,8 @@ REGISTER_TEST(mri_one)
|
|||||||
cl_program program;
|
cl_program program;
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
size_t threads[2];
|
size_t threads[2];
|
||||||
int img_width = 512;
|
size_t img_width = 512;
|
||||||
int img_height = 512;
|
size_t img_height = 512;
|
||||||
int i, err;
|
int i, err;
|
||||||
size_t origin[3] = {0, 0, 0};
|
size_t origin[3] = {0, 0, 0};
|
||||||
size_t region[3] = {img_width, img_height, 1};
|
size_t region[3] = {img_width, img_height, 1};
|
||||||
|
|||||||
@@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
||||||
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
|
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
|
||||||
0, 0);
|
0, 0);
|
||||||
memset(read_data, -1, read_data_size);
|
memset(read_data, -1, read_data_size);
|
||||||
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
||||||
@@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
|
|||||||
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
|
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
|
||||||
|
|
||||||
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
||||||
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
|
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
|
||||||
0, 0);
|
0, 0);
|
||||||
memset(read_data, -1, read_data_size);
|
memset(read_data, -1, read_data_size);
|
||||||
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
||||||
|
|||||||
@@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange
|
|||||||
maxWorkItemSizes[2] };
|
maxWorkItemSizes[2] };
|
||||||
// check if maximum work group size for current dimention is not
|
// check if maximum work group size for current dimention is not
|
||||||
// exceeded
|
// exceeded
|
||||||
cl_uint work_group_size = max_workgroup_size + 1;
|
size_t work_group_size = max_workgroup_size + 1;
|
||||||
while (max_workgroup_size < work_group_size && work_group_size != 1)
|
while (max_workgroup_size < work_group_size && work_group_size != 1)
|
||||||
{
|
{
|
||||||
work_group_size = 1;
|
work_group_size = 1;
|
||||||
@@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange
|
|||||||
|
|
||||||
// compute max number of work groups based on buffer size and max
|
// compute max number of work groups based on buffer size and max
|
||||||
// group size
|
// group size
|
||||||
cl_uint max_work_groups = testData.size() / work_group_size;
|
size_t max_work_groups = testData.size() / work_group_size;
|
||||||
// take into account number of dimentions
|
// take into account number of dimentions
|
||||||
cl_uint work_groups_per_dim =
|
size_t work_groups_per_dim =
|
||||||
std::max(1, (int)pow(max_work_groups, 1.f / dim));
|
std::max(1, (int)pow(max_work_groups, 1.f / dim));
|
||||||
|
|
||||||
for (size_t j = 0; j < dim; j++)
|
for (size_t j = 0; j < dim; j++)
|
||||||
|
|||||||
@@ -24,8 +24,9 @@
|
|||||||
|
|
||||||
#include "CL/cl_half.h"
|
#include "CL/cl_half.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <iomanip>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
|
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
|
||||||
#define MAX_HOST_THREADS GetThreadCount()
|
#define MAX_HOST_THREADS GetThreadCount()
|
||||||
@@ -74,9 +75,11 @@ extern int
|
|||||||
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
||||||
extern cl_device_atomic_capabilities gAtomicMemCap,
|
extern cl_device_atomic_capabilities gAtomicMemCap,
|
||||||
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
||||||
|
|
||||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
extern bool gFloatAtomicsSupported;
|
extern bool gFloatAtomicsSupported;
|
||||||
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
||||||
|
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
|
||||||
|
|
||||||
extern const char *
|
extern const char *
|
||||||
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
|
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
|
||||||
@@ -174,6 +177,13 @@ public:
|
|||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
virtual bool
|
||||||
|
IsTestNotAsExpected(const HostDataType &expected,
|
||||||
|
const std::vector<HostAtomicType> &testValues,
|
||||||
|
cl_uint whichDestValue)
|
||||||
|
{
|
||||||
|
return expected != testValues[whichDestValue];
|
||||||
|
}
|
||||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d)
|
MTdata d)
|
||||||
{
|
{
|
||||||
@@ -883,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
|
|||||||
header += std::string("__global volatile ") + aTypeName + " destMemory["
|
header += std::string("__global volatile ") + aTypeName + " destMemory["
|
||||||
+ ss.str() + "] = {\n";
|
+ ss.str() + "] = {\n";
|
||||||
ss.str("");
|
ss.str("");
|
||||||
|
|
||||||
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||||
!= TYPE_ATOMIC_HALF)
|
== TYPE_ATOMIC_FLOAT)
|
||||||
ss << _startValue;
|
ss << std::setprecision(10) << _startValue;
|
||||||
else
|
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||||
|
== TYPE_ATOMIC_HALF)
|
||||||
ss << static_cast<HostDataType>(
|
ss << static_cast<HostDataType>(
|
||||||
cl_half_to_float(static_cast<cl_half>(_startValue)));
|
cl_half_to_float(static_cast<cl_half>(_startValue)));
|
||||||
|
else
|
||||||
|
ss << _startValue;
|
||||||
for (cl_uint i = 0; i < maxNumDestItems; i++)
|
for (cl_uint i = 0; i < maxNumDestItems; i++)
|
||||||
{
|
{
|
||||||
if (aTypeName == "atomic_flag")
|
if (aTypeName == "atomic_flag")
|
||||||
@@ -1449,7 +1460,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
|||||||
startRefValues.size() ? &startRefValues[0] : 0, i))
|
startRefValues.size() ? &startRefValues[0] : 0, i))
|
||||||
break; // no expected value function provided
|
break; // no expected value function provided
|
||||||
|
|
||||||
if (expected != destItems[i])
|
if (IsTestNotAsExpected(expected, destItems, i))
|
||||||
{
|
{
|
||||||
std::stringstream logLine;
|
std::stringstream logLine;
|
||||||
logLine << "ERROR: Result " << i
|
logLine << "ERROR: Result " << i
|
||||||
|
|||||||
@@ -17,6 +17,9 @@
|
|||||||
#define HOST_ATOMICS_H_
|
#define HOST_ATOMICS_H_
|
||||||
|
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "CL/cl_half.h"
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include "Windows.h"
|
#include "Windows.h"
|
||||||
@@ -87,6 +90,8 @@ enum TExplicitMemoryOrderType
|
|||||||
|
|
||||||
#define HOST_FLAG cl_int
|
#define HOST_FLAG cl_int
|
||||||
|
|
||||||
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
|
|
||||||
// host atomic functions
|
// host atomic functions
|
||||||
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
|
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
|
||||||
|
|
||||||
@@ -94,6 +99,16 @@ template <typename AtomicType, typename CorrespondingType>
|
|||||||
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
|
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
static std::mutex mx;
|
||||||
|
std::lock_guard<std::mutex> lock(mx);
|
||||||
|
CorrespondingType old_value = *a;
|
||||||
|
*a += c;
|
||||||
|
return old_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||||
return InterlockedExchangeAdd(a, c);
|
return InterlockedExchangeAdd(a, c);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
@@ -103,11 +118,23 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
|
|||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename AtomicType, typename CorrespondingType>
|
template <typename AtomicType, typename CorrespondingType>
|
||||||
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
|
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
static std::mutex mx;
|
||||||
|
std::lock_guard<std::mutex> lock(mx);
|
||||||
|
CorrespondingType old_value = *a;
|
||||||
|
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
|
||||||
|
gHalfRoundingMode);
|
||||||
|
return old_value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||||
return InterlockedExchangeSubtract(a, c);
|
return InterlockedExchangeSubtract(a, c);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
@@ -117,6 +144,7 @@ CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingTyp
|
|||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename AtomicType, typename CorrespondingType>
|
template <typename AtomicType, typename CorrespondingType>
|
||||||
CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType c,
|
CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType c,
|
||||||
@@ -145,7 +173,22 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
|||||||
TExplicitMemoryOrderType order_failure)
|
TExplicitMemoryOrderType order_failure)
|
||||||
{
|
{
|
||||||
CorrespondingType tmp;
|
CorrespondingType tmp;
|
||||||
|
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
static std::mutex mtx;
|
||||||
|
std::lock_guard<std::mutex> lock(mtx);
|
||||||
|
tmp = *reinterpret_cast<volatile float *>(a);
|
||||||
|
if (tmp == *expected)
|
||||||
|
{
|
||||||
|
*reinterpret_cast<volatile float *>(a) = desired;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
*expected = tmp;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||||
|
|
||||||
tmp = InterlockedCompareExchange(a, desired, *expected);
|
tmp = InterlockedCompareExchange(a, desired, *expected);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
tmp = __sync_val_compare_and_swap(a, *expected, desired);
|
tmp = __sync_val_compare_and_swap(a, *expected, desired);
|
||||||
@@ -153,9 +196,9 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
|||||||
log_info("Host function not implemented: atomic_compare_exchange\n");
|
log_info("Host function not implemented: atomic_compare_exchange\n");
|
||||||
tmp = 0;
|
tmp = 0;
|
||||||
#endif
|
#endif
|
||||||
if(tmp == *expected)
|
if (tmp == *expected) return true;
|
||||||
return true;
|
|
||||||
*expected = tmp;
|
*expected = tmp;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
|
|||||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||||
bool gFloatAtomicsSupported = false;
|
bool gFloatAtomicsSupported = false;
|
||||||
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
||||||
|
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
|
||||||
|
|
||||||
test_status InitCL(cl_device_id device) {
|
test_status InitCL(cl_device_id device) {
|
||||||
auto version = get_device_cl_version(device);
|
auto version = get_device_cl_version(device);
|
||||||
@@ -132,6 +133,12 @@ test_status InitCL(cl_device_id device) {
|
|||||||
if (is_extension_available(device, "cl_ext_float_atomics"))
|
if (is_extension_available(device, "cl_ext_float_atomics"))
|
||||||
{
|
{
|
||||||
gFloatAtomicsSupported = true;
|
gFloatAtomicsSupported = true;
|
||||||
|
|
||||||
|
cl_int error = clGetDeviceInfo(
|
||||||
|
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||||
|
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
||||||
|
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
||||||
|
|
||||||
if (is_extension_available(device, "cl_khr_fp16"))
|
if (is_extension_available(device, "cl_khr_fp16"))
|
||||||
{
|
{
|
||||||
cl_int error = clGetDeviceInfo(
|
cl_int error = clGetDeviceInfo(
|
||||||
|
|||||||
@@ -16,10 +16,13 @@
|
|||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/kernelHelpers.h"
|
#include "harness/kernelHelpers.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/conversions.h"
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "host_atomics.h"
|
#include "host_atomics.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -1163,39 +1166,134 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
|
|||||||
template <typename HostAtomicType, typename HostDataType>
|
template <typename HostAtomicType, typename HostDataType>
|
||||||
class CBasicTestFetchAdd
|
class CBasicTestFetchAdd
|
||||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||||
|
|
||||||
|
double min_range;
|
||||||
|
double max_range;
|
||||||
|
double max_error_fp32;
|
||||||
|
std::vector<HostDataType> ref_vals;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType,
|
using CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::MemoryOrderScopeStr;
|
HostDataType>::MemoryOrderScopeStr;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||||
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||||
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
|
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
|
||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM),
|
||||||
{}
|
min_range(-999.0), max_range(999.0), max_error_fp32(0.0)
|
||||||
virtual std::string ProgramCore()
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
StartValue(0.f);
|
||||||
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::OldValueCheck(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
|
MTdata d) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (threadCount > ref_vals.size())
|
||||||
|
{
|
||||||
|
ref_vals.resize(threadCount);
|
||||||
|
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
ref_vals[i] = get_random_float(min_range, max_range, d);
|
||||||
|
|
||||||
|
memcpy(startRefValues, ref_vals.data(),
|
||||||
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
|
|
||||||
|
// Estimate highest possible summation error for given set.
|
||||||
|
std::vector<HostDataType> sums;
|
||||||
|
std::sort(ref_vals.begin(), ref_vals.end());
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||||
|
|
||||||
|
std::sort(
|
||||||
|
ref_vals.begin(), ref_vals.end(),
|
||||||
|
[](float a, float b) { return std::abs(a) < std::abs(b); });
|
||||||
|
|
||||||
|
double precise = 0.0;
|
||||||
|
for (auto elem : ref_vals) precise += double(elem);
|
||||||
|
sums.push_back(precise);
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||||
|
|
||||||
|
std::sort(sums.begin(), sums.end());
|
||||||
|
max_error_fp32 =
|
||||||
|
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
|
||||||
|
|
||||||
|
// restore unsorted order
|
||||||
|
memcpy(ref_vals.data(), startRefValues,
|
||||||
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memcpy(startRefValues, ref_vals.data(),
|
||||||
|
sizeof(HostDataType) * threadCount);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::string ProgramCore() override
|
||||||
{
|
{
|
||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
||||||
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||||
|
+ memoryOrderScope + ");\n"
|
||||||
|
+ " oldValues[tid] = atomic_fetch_add" + postfix
|
||||||
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
||||||
|
+ ")0" + memoryOrderScope + ");\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
return " oldValues[tid] = atomic_fetch_add" + postfix
|
return " oldValues[tid] = atomic_fetch_add" + postfix
|
||||||
+ "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
|
+ "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
|
||||||
+ ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
|
+ ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
|
||||||
+ postfix + "(&destMemory[0], ("
|
+ postfix + "(&destMemory[0], ("
|
||||||
+ DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
|
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
||||||
|
+ memoryOrderScope
|
||||||
+ ");\n"
|
+ ");\n"
|
||||||
" atomic_fetch_add"
|
" atomic_fetch_add"
|
||||||
+ postfix + "(&destMemory[0], ("
|
+ postfix + "(&destMemory[0], ("
|
||||||
+ DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
|
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
||||||
|
+ memoryOrderScope
|
||||||
+ ");\n"
|
+ ");\n"
|
||||||
" atomic_fetch_add"
|
" atomic_fetch_add"
|
||||||
+ postfix + "(&destMemory[0], (("
|
+ postfix + "(&destMemory[0], (("
|
||||||
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
||||||
+ DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
|
+ DataType().AddSubOperandTypeName() + ")-1)*8"
|
||||||
+ ");\n";
|
+ memoryOrderScope + ");\n";
|
||||||
}
|
}
|
||||||
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
}
|
||||||
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues)
|
HostDataType *oldValues) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
||||||
|
MemoryOrder());
|
||||||
|
oldValues[tid] = host_atomic_fetch_add(
|
||||||
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
oldValues[tid] = host_atomic_fetch_add(
|
oldValues[tid] = host_atomic_fetch_add(
|
||||||
&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
|
&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
|
||||||
@@ -1203,21 +1301,98 @@ public:
|
|||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
host_atomic_fetch_add(&destMemory[0],
|
host_atomic_fetch_add(
|
||||||
((HostDataType)tid + 3)
|
&destMemory[0],
|
||||||
<< (sizeof(HostDataType) - 1) * 8,
|
(((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
}
|
}
|
||||||
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
}
|
||||||
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
expected += startRefValues[i];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
expected += ((HostDataType)i + 3) * 3
|
expected += ((HostDataType)i + 3) * 3
|
||||||
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||||
|
const std::vector<HostAtomicType> &testValues,
|
||||||
|
cl_uint whichDestValue) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
return std::abs((HOST_ATOMIC_FLOAT)expected
|
||||||
|
- testValues[whichDestValue])
|
||||||
|
> max_error_fp32;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||||
|
testValues,
|
||||||
|
whichDestValue);
|
||||||
|
}
|
||||||
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
|
HostAtomicType *finalValues) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
correct = true;
|
||||||
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
if (refValues[i] != StartValue())
|
||||||
|
{
|
||||||
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||||
|
(cl_uint)refValues[i]);
|
||||||
|
correct = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !correct;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::VerifyRefs(correct,
|
||||||
|
threadCount,
|
||||||
|
refValues,
|
||||||
|
finalValues);
|
||||||
|
}
|
||||||
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
|
cl_command_queue queue) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0; // skip test - not applicable
|
||||||
|
|
||||||
|
if (!LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||||
|
queue);
|
||||||
|
}
|
||||||
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
return threadCount;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::NumResults(threadCount,
|
||||||
|
deviceID);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||||
@@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
|||||||
TYPE_ATOMIC_ULONG, useSVM);
|
TYPE_ATOMIC_ULONG, useSVM);
|
||||||
EXECUTE_TEST(error,
|
EXECUTE_TEST(error,
|
||||||
test_ulong.Execute(deviceID, context, queue, num_elements));
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
||||||
|
|
||||||
|
if (gFloatAtomicsSupported)
|
||||||
|
{
|
||||||
|
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||||
|
TYPE_ATOMIC_FLOAT, useSVM);
|
||||||
|
EXECUTE_TEST(
|
||||||
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
||||||
|
}
|
||||||
|
|
||||||
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
||||||
{
|
{
|
||||||
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
||||||
@@ -1304,46 +1488,239 @@ REGISTER_TEST(svm_atomic_fetch_add)
|
|||||||
template <typename HostAtomicType, typename HostDataType>
|
template <typename HostAtomicType, typename HostDataType>
|
||||||
class CBasicTestFetchSub
|
class CBasicTestFetchSub
|
||||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||||
|
|
||||||
|
double min_range;
|
||||||
|
double max_range;
|
||||||
|
double max_error;
|
||||||
|
std::vector<HostDataType> ref_vals;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType,
|
using CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::MemoryOrderScopeStr;
|
HostDataType>::MemoryOrderScopeStr;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||||
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||||
CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
|
CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
|
||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM),
|
||||||
{}
|
min_range(-999.0), max_range(999.0), max_error(0.0)
|
||||||
virtual std::string ProgramCore()
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
StartValue(0);
|
||||||
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::OldValueCheck(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename Iterator>
|
||||||
|
float subtract_halfs(Iterator begin, Iterator end)
|
||||||
|
{
|
||||||
|
cl_half res = 0;
|
||||||
|
for (auto it = begin; it != end; ++it)
|
||||||
|
{
|
||||||
|
res = cl_half_from_float(cl_half_to_float(res)
|
||||||
|
- cl_half_to_float(*it),
|
||||||
|
gHalfRoundingMode);
|
||||||
|
}
|
||||||
|
return cl_half_to_float(res);
|
||||||
|
}
|
||||||
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
|
MTdata d) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
if (threadCount > ref_vals.size())
|
||||||
|
{
|
||||||
|
ref_vals.resize(threadCount);
|
||||||
|
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
ref_vals[i] = cl_half_from_float(
|
||||||
|
get_random_float(min_range, max_range, d),
|
||||||
|
gHalfRoundingMode);
|
||||||
|
|
||||||
|
memcpy(startRefValues, ref_vals.data(),
|
||||||
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
|
|
||||||
|
// Estimate highest possible summation error for given set.
|
||||||
|
std::vector<float> sums;
|
||||||
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||||
|
[](cl_half a, cl_half b) {
|
||||||
|
return cl_half_to_float(a) < cl_half_to_float(b);
|
||||||
|
});
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
subtract_halfs(ref_vals.begin(), ref_vals.end()));
|
||||||
|
sums.push_back(
|
||||||
|
subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
||||||
|
|
||||||
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||||
|
[](cl_half a, cl_half b) {
|
||||||
|
return std::abs(cl_half_to_float(a))
|
||||||
|
< std::abs(cl_half_to_float(b));
|
||||||
|
});
|
||||||
|
|
||||||
|
float precise = 0.f;
|
||||||
|
for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
|
||||||
|
sums.push_back(precise);
|
||||||
|
|
||||||
|
sums.push_back(
|
||||||
|
subtract_halfs(ref_vals.begin(), ref_vals.end()));
|
||||||
|
sums.push_back(
|
||||||
|
subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
||||||
|
|
||||||
|
std::sort(sums.begin(), sums.end());
|
||||||
|
max_error = std::abs(sums.front() - sums.back());
|
||||||
|
|
||||||
|
// restore unsorted order
|
||||||
|
memcpy(ref_vals.data(), startRefValues,
|
||||||
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memcpy(startRefValues, ref_vals.data(),
|
||||||
|
sizeof(HostDataType) * threadCount);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::string ProgramCore() override
|
||||||
{
|
{
|
||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
|
||||||
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||||
|
+ memoryOrderScope + ");\n"
|
||||||
|
+ " oldValues[tid] = atomic_fetch_sub" + postfix
|
||||||
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
||||||
|
+ ")0" + memoryOrderScope + ");\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
return " oldValues[tid] = atomic_fetch_sub" + postfix
|
return " oldValues[tid] = atomic_fetch_sub" + postfix
|
||||||
+ "(&destMemory[0], tid + 3 +((("
|
+ "(&destMemory[0], tid + 3 +((("
|
||||||
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
||||||
+ DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope
|
+ DataType().AddSubOperandTypeName() + ")-1)*8)"
|
||||||
+ ");\n";
|
+ memoryOrderScope + ");\n";
|
||||||
}
|
}
|
||||||
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
}
|
||||||
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues)
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
|
||||||
|
MemoryOrder());
|
||||||
oldValues[tid] = host_atomic_fetch_sub(
|
oldValues[tid] = host_atomic_fetch_sub(
|
||||||
&destMemory[0],
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oldValues[tid] =
|
||||||
|
host_atomic_fetch_sub(&destMemory[0],
|
||||||
(HostDataType)tid + 3
|
(HostDataType)tid + 3
|
||||||
+ (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
|
+ (((HostDataType)tid + 3)
|
||||||
|
<< (sizeof(HostDataType) - 1) * 8),
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
}
|
}
|
||||||
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
}
|
||||||
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
{
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
expected = cl_half_from_float(
|
||||||
|
cl_half_to_float(expected)
|
||||||
|
- cl_half_to_float(startRefValues[i]),
|
||||||
|
gHalfRoundingMode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
expected -= (HostDataType)i + 3
|
expected -= (HostDataType)i + 3
|
||||||
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||||
|
const std::vector<HostAtomicType> &testValues,
|
||||||
|
cl_uint whichDestValue) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
return std::abs(cl_half_to_float(expected)
|
||||||
|
- cl_half_to_float(testValues[whichDestValue]))
|
||||||
|
> max_error;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||||
|
testValues,
|
||||||
|
whichDestValue);
|
||||||
|
}
|
||||||
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
|
HostAtomicType *finalValues) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
correct = true;
|
||||||
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
if (refValues[i] != StartValue())
|
||||||
|
{
|
||||||
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||||
|
(cl_uint)refValues[i]);
|
||||||
|
correct = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !correct;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::VerifyRefs(correct,
|
||||||
|
threadCount,
|
||||||
|
refValues,
|
||||||
|
finalValues);
|
||||||
|
}
|
||||||
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
|
cl_command_queue queue) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
if (LocalMemory()
|
||||||
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0; // skip test - not applicable
|
||||||
|
|
||||||
|
if (!LocalMemory()
|
||||||
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||||
|
queue);
|
||||||
|
}
|
||||||
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
||||||
|
{
|
||||||
|
return threadCount;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::NumResults(threadCount,
|
||||||
|
deviceID);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
|
static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
|
||||||
@@ -1368,6 +1745,15 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
|
|||||||
TYPE_ATOMIC_ULONG, useSVM);
|
TYPE_ATOMIC_ULONG, useSVM);
|
||||||
EXECUTE_TEST(error,
|
EXECUTE_TEST(error,
|
||||||
test_ulong.Execute(deviceID, context, queue, num_elements));
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
||||||
|
|
||||||
|
if (gFloatAtomicsSupported)
|
||||||
|
{
|
||||||
|
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||||
|
TYPE_ATOMIC_HALF, useSVM);
|
||||||
|
EXECUTE_TEST(error,
|
||||||
|
test_half.Execute(deviceID, context, queue, num_elements));
|
||||||
|
}
|
||||||
|
|
||||||
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
||||||
{
|
{
|
||||||
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
||||||
@@ -2222,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2)
|
|||||||
template <typename HostAtomicType, typename HostDataType>
|
template <typename HostAtomicType, typename HostDataType>
|
||||||
class CBasicTestFetchMin
|
class CBasicTestFetchMin
|
||||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||||
|
double min_range;
|
||||||
|
double max_range;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType,
|
using CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::MemoryOrderScopeStr;
|
HostDataType>::MemoryOrderScopeStr;
|
||||||
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||||
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
|
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
|
||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM),
|
||||||
|
min_range(-999.0), max_range(999.0)
|
||||||
{
|
{
|
||||||
StartValue(DataType().MaxValue());
|
StartValue(DataType().MaxValue());
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::OldValueCheck(false);
|
||||||
}
|
}
|
||||||
virtual std::string ProgramCore()
|
}
|
||||||
|
std::string ProgramCore() override
|
||||||
{
|
{
|
||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
return " oldValues[tid] = atomic_fetch_min" + postfix
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
|
|
||||||
}
|
|
||||||
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
||||||
volatile HostAtomicType *destMemory,
|
|
||||||
HostDataType *oldValues)
|
|
||||||
{
|
{
|
||||||
oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
return " atomic_fetch_min" + postfix
|
||||||
MemoryOrder());
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
||||||
|
+ " oldValues[tid] = atomic_fetch_min" + postfix
|
||||||
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
||||||
|
+ ")0" + memoryOrderScope + ");\n";
|
||||||
}
|
}
|
||||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
else
|
||||||
MTdata d)
|
{
|
||||||
|
return " oldValues[tid] = atomic_fetch_min" + postfix
|
||||||
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
|
||||||
|
+ ");\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||||
|
volatile HostAtomicType *destMemory,
|
||||||
|
HostDataType *oldValues) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
||||||
|
MemoryOrder());
|
||||||
|
oldValues[tid] = host_atomic_fetch_min(
|
||||||
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oldValues[tid] = host_atomic_fetch_min(
|
||||||
|
&destMemory[0], oldValues[tid], MemoryOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
|
MTdata d) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
startRefValues[i] = get_random_float(min_range, max_range, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
startRefValues[i] = genrand_int32(d);
|
startRefValues[i] = genrand_int32(d);
|
||||||
if (sizeof(HostDataType) >= 8)
|
if (sizeof(HostDataType) >= 8)
|
||||||
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
|
{
|
||||||
|
cl_ulong v = startRefValues[i];
|
||||||
|
v |= (cl_ulong)genrand_int32(d) << 16;
|
||||||
|
startRefValues[i] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
if (startRefValues[i] < expected)
|
||||||
|
expected = startRefValues[i];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
if (startRefValues[i] < expected) expected = startRefValues[i];
|
if (startRefValues[i] < expected) expected = startRefValues[i];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||||
|
const std::vector<HostAtomicType> &testValues,
|
||||||
|
cl_uint whichDestValue) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||||
|
IsTestNotAsExpected(expected, testValues, whichDestValue);
|
||||||
|
return false; // ignore all but 0 which stores final result
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||||
|
testValues,
|
||||||
|
whichDestValue);
|
||||||
|
}
|
||||||
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
|
HostAtomicType *finalValues) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
correct = true;
|
||||||
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||||
|
(cl_uint)refValues[i]);
|
||||||
|
correct = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !correct;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::VerifyRefs(correct,
|
||||||
|
threadCount,
|
||||||
|
refValues,
|
||||||
|
finalValues);
|
||||||
|
}
|
||||||
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
|
cl_command_queue queue) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||||
|
== 0)
|
||||||
|
return 0; // skip test - not applicable
|
||||||
|
|
||||||
|
if (!LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||||
|
== 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||||
|
queue);
|
||||||
|
}
|
||||||
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
return threadCount;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::NumResults(threadCount,
|
||||||
|
deviceID);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int test_atomic_fetch_min_generic(cl_device_id deviceID,
|
static int test_atomic_fetch_min_generic(cl_device_id deviceID,
|
||||||
@@ -2294,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
|
|||||||
TYPE_ATOMIC_ULONG, useSVM);
|
TYPE_ATOMIC_ULONG, useSVM);
|
||||||
EXECUTE_TEST(error,
|
EXECUTE_TEST(error,
|
||||||
test_ulong.Execute(deviceID, context, queue, num_elements));
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
||||||
|
|
||||||
|
if (gFloatAtomicsSupported)
|
||||||
|
{
|
||||||
|
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||||
|
TYPE_ATOMIC_FLOAT, useSVM);
|
||||||
|
EXECUTE_TEST(
|
||||||
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
||||||
|
}
|
||||||
|
|
||||||
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
||||||
{
|
{
|
||||||
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
||||||
@@ -2356,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min)
|
|||||||
template <typename HostAtomicType, typename HostDataType>
|
template <typename HostAtomicType, typename HostDataType>
|
||||||
class CBasicTestFetchMax
|
class CBasicTestFetchMax
|
||||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||||
|
double min_range;
|
||||||
|
double max_range;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||||
using CBasicTestMemOrderScope<HostAtomicType,
|
using CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::MemoryOrderScopeStr;
|
HostDataType>::MemoryOrderScopeStr;
|
||||||
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||||
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
|
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
|
||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM),
|
||||||
|
min_range(-999.0), max_range(999.0)
|
||||||
{
|
{
|
||||||
StartValue(DataType().MinValue());
|
StartValue(DataType().MinValue());
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::OldValueCheck(false);
|
||||||
}
|
}
|
||||||
virtual std::string ProgramCore()
|
}
|
||||||
|
std::string ProgramCore() override
|
||||||
{
|
{
|
||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
return " oldValues[tid] = atomic_fetch_max" + postfix
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
|
|
||||||
}
|
|
||||||
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
||||||
volatile HostAtomicType *destMemory,
|
|
||||||
HostDataType *oldValues)
|
|
||||||
{
|
{
|
||||||
oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
return " atomic_fetch_max" + postfix
|
||||||
MemoryOrder());
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
||||||
|
+ " oldValues[tid] = atomic_fetch_max" + postfix
|
||||||
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
||||||
|
+ ")0" + memoryOrderScope + ");\n";
|
||||||
}
|
}
|
||||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
else
|
||||||
MTdata d)
|
{
|
||||||
|
return " oldValues[tid] = atomic_fetch_max" + postfix
|
||||||
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
|
||||||
|
+ ");\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||||
|
volatile HostAtomicType *destMemory,
|
||||||
|
HostDataType *oldValues) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
||||||
|
MemoryOrder());
|
||||||
|
oldValues[tid] = host_atomic_fetch_max(
|
||||||
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
oldValues[tid] = host_atomic_fetch_max(
|
||||||
|
&destMemory[0], oldValues[tid], MemoryOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
|
MTdata d) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
startRefValues[i] = get_random_float(min_range, max_range, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
startRefValues[i] = genrand_int32(d);
|
startRefValues[i] = genrand_int32(d);
|
||||||
if (sizeof(HostDataType) >= 8)
|
if (sizeof(HostDataType) >= 8)
|
||||||
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
|
{
|
||||||
|
cl_ulong v = startRefValues[i];
|
||||||
|
v |= (cl_ulong)genrand_int32(d) << 16;
|
||||||
|
startRefValues[i] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
|
if (startRefValues[i] > expected)
|
||||||
|
expected = startRefValues[i];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
if (startRefValues[i] > expected) expected = startRefValues[i];
|
if (startRefValues[i] > expected) expected = startRefValues[i];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||||
|
const std::vector<HostAtomicType> &testValues,
|
||||||
|
cl_uint whichDestValue) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
if (whichDestValue == 0)
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||||
|
IsTestNotAsExpected(expected, testValues, whichDestValue);
|
||||||
|
return false; // ignore all but 0 which stores final result
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||||
|
testValues,
|
||||||
|
whichDestValue);
|
||||||
|
}
|
||||||
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
|
HostAtomicType *finalValues) override
|
||||||
|
{
|
||||||
|
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||||
|
{
|
||||||
|
correct = true;
|
||||||
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
|
{
|
||||||
|
if (refValues[i] != StartValue())
|
||||||
|
{
|
||||||
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||||
|
(cl_uint)refValues[i]);
|
||||||
|
correct = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !correct;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::VerifyRefs(correct,
|
||||||
|
threadCount,
|
||||||
|
refValues,
|
||||||
|
finalValues);
|
||||||
|
}
|
||||||
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
|
cl_command_queue queue) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
if (LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||||
|
== 0)
|
||||||
|
return 0; // skip test - not applicable
|
||||||
|
|
||||||
|
if (!LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||||
|
== 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<
|
||||||
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||||
|
queue);
|
||||||
|
}
|
||||||
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||||
|
{
|
||||||
|
return threadCount;
|
||||||
|
}
|
||||||
|
return CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::NumResults(threadCount,
|
||||||
|
deviceID);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int test_atomic_fetch_max_generic(cl_device_id deviceID,
|
static int test_atomic_fetch_max_generic(cl_device_id deviceID,
|
||||||
@@ -2428,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
|
|||||||
TYPE_ATOMIC_ULONG, useSVM);
|
TYPE_ATOMIC_ULONG, useSVM);
|
||||||
EXECUTE_TEST(error,
|
EXECUTE_TEST(error,
|
||||||
test_ulong.Execute(deviceID, context, queue, num_elements));
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
||||||
|
|
||||||
|
if (gFloatAtomicsSupported)
|
||||||
|
{
|
||||||
|
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||||
|
TYPE_ATOMIC_FLOAT, useSVM);
|
||||||
|
EXECUTE_TEST(
|
||||||
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
||||||
|
}
|
||||||
|
|
||||||
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
||||||
{
|
{
|
||||||
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES
|
|||||||
# needed by Vulkan wrapper to compile
|
# needed by Vulkan wrapper to compile
|
||||||
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
|
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
|
||||||
add_cxx_flag_if_supported(-Wmisleading-indentation)
|
add_cxx_flag_if_supported(-Wmisleading-indentation)
|
||||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
|
||||||
add_cxx_flag_if_supported(-Wno-format)
|
add_cxx_flag_if_supported(-Wno-format)
|
||||||
add_cxx_flag_if_supported(-Wno-error)
|
add_cxx_flag_if_supported(-Wno-error)
|
||||||
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
|
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
|
||||||
|
|||||||
@@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage(
|
|||||||
size_t clImageFormatSize;
|
size_t clImageFormatSize;
|
||||||
cl_image_desc image_desc;
|
cl_image_desc image_desc;
|
||||||
memset(&image_desc, 0x0, sizeof(cl_image_desc));
|
memset(&image_desc, 0x0, sizeof(cl_image_desc));
|
||||||
cl_image_format img_format = { 0 };
|
img_format = { 0 };
|
||||||
const VkImageCreateInfo VulkanImageCreateInfo =
|
const VkImageCreateInfo VulkanImageCreateInfo =
|
||||||
image2D.getVkImageCreateInfo();
|
image2D.getVkImageCreateInfo();
|
||||||
|
|
||||||
@@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue)
|
|||||||
import.fd = fd;
|
import.fd = fd;
|
||||||
import.pNext = nullptr;
|
import.pNext = nullptr;
|
||||||
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||||
import.flags = 0;
|
import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT;
|
||||||
|
|
||||||
VkResult res =
|
VkResult res =
|
||||||
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);
|
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);
|
||||||
|
|||||||
@@ -106,6 +106,7 @@ protected:
|
|||||||
cl_mem m_externalMemory;
|
cl_mem m_externalMemory;
|
||||||
int fd;
|
int fd;
|
||||||
void *handle;
|
void *handle;
|
||||||
|
cl_image_format img_format;
|
||||||
clExternalMemoryImage();
|
clExternalMemoryImage();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -117,6 +118,7 @@ public:
|
|||||||
cl_device_id deviceId);
|
cl_device_id deviceId);
|
||||||
virtual ~clExternalMemoryImage();
|
virtual ~clExternalMemoryImage();
|
||||||
cl_mem getExternalMemoryImage();
|
cl_mem getExternalMemoryImage();
|
||||||
|
cl_image_format getImageFormat() { return img_format; };
|
||||||
};
|
};
|
||||||
|
|
||||||
class clExternalSemaphore {
|
class clExternalSemaphore {
|
||||||
|
|||||||
@@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList(
|
|||||||
VkPhysicalDeviceExternalBufferInfo buffer_info = {};
|
VkPhysicalDeviceExternalBufferInfo buffer_info = {};
|
||||||
buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
|
buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
|
||||||
buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||||
|
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
|
||||||
|
| VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||||
|
|
||||||
VkExternalBufferProperties buffer_properties = {};
|
VkExternalBufferProperties buffer_properties = {};
|
||||||
buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
|
buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
|
||||||
@@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice)
|
|||||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr,
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr,
|
||||||
handle_type.vk_type
|
handle_type.vk_type
|
||||||
};
|
};
|
||||||
VkExternalSemaphoreProperties query_result = {};
|
VkExternalSemaphoreProperties query_result = {
|
||||||
|
VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES
|
||||||
|
};
|
||||||
vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
||||||
vkDevice.getPhysicalDevice(), &handle_query, &query_result);
|
vkDevice.getPhysicalDevice(), &handle_query, &query_result);
|
||||||
if (query_result.externalSemaphoreFeatures
|
if (query_result.externalSemaphoreFeatures
|
||||||
|
|||||||
@@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
|
|
||||||
include(../CMakeCommon.txt)
|
include(../CMakeCommon.txt)
|
||||||
|
|
||||||
|
# Include the relative paths to SPV assembly files
|
||||||
|
configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
|
||||||
|
include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
|
||||||
|
|
||||||
|
# Determine the corresponding binary outputs to the SPV assembly input files
|
||||||
|
set(COMPILER_ASM_REL_PATH spirv_asm)
|
||||||
|
set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}")
|
||||||
|
set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin")
|
||||||
|
|
||||||
# Copy the required test include directories into the build directory.
|
# Copy the required test include directories into the build directory.
|
||||||
if(NOT DEFINED COMPILER_TEST_RESOURCES)
|
if(NOT DEFINED COMPILER_TEST_RESOURCES)
|
||||||
set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
|
set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(COMPILER_SPV_EXTRA "")
|
||||||
|
if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND
|
||||||
|
IS_DIRECTORY "${SPIRV_TOOLS_DIR}")
|
||||||
|
message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'")
|
||||||
|
set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val")
|
||||||
|
endif()
|
||||||
|
set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py)
|
||||||
|
|
||||||
|
include(CMakePrintHelpers)
|
||||||
|
cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT)
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
COMMENT "Copying compiler test resources..."
|
COMMENT "Copying compiler test resources..."
|
||||||
TARGET ${${MODULE_NAME}_OUT}
|
TARGET ${${MODULE_NAME}_OUT}
|
||||||
@@ -30,7 +51,10 @@ add_custom_command(
|
|||||||
${COMPILER_TEST_RESOURCES}/includeTestDirectory
|
${COMPILER_TEST_RESOURCES}/includeTestDirectory
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||||
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
|
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
|
||||||
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)
|
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
|
||||||
|
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
|
||||||
|
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
|
||||||
|
VERBATIM)
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
|
||||||
|
|||||||
4
test_conformance/compiler/spirv_asm_list.txt
Normal file
4
test_conformance/compiler/spirv_asm_list.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
set(COMPILER_SPIRV_NEW_ASM
|
||||||
|
compiler_spirv_asm/write_kernel.spvasm32
|
||||||
|
compiler_spirv_asm/write_kernel.spvasm64
|
||||||
|
)
|
||||||
@@ -14,6 +14,9 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
#include "testBase.h"
|
#include "testBase.h"
|
||||||
|
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#elif defined(__linux__) || defined(__APPLE__)
|
#elif defined(__linux__) || defined(__APPLE__)
|
||||||
@@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__APPLE__) || defined(__linux)
|
|
||||||
#define _mkdir(x) mkdir(x, S_IRWXU)
|
|
||||||
#define _chdir chdir
|
|
||||||
#define _rmdir rmdir
|
|
||||||
#define _unlink unlink
|
|
||||||
#else
|
|
||||||
#include <direct.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
REGISTER_TEST(execute_after_included_header_link)
|
REGISTER_TEST(execute_after_included_header_link)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
@@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* setup */
|
/* setup */
|
||||||
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
|
std::error_code ec;
|
||||||
/* Some tests systems doesn't allow one to write in the test directory */
|
auto temp_dir_path = std::filesystem::temp_directory_path(ec);
|
||||||
if (_chdir("/tmp") != 0)
|
if (ec)
|
||||||
{
|
{
|
||||||
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
|
log_error("ERROR: Unable to get the temporary directory path\n");
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#endif
|
temp_dir_path = temp_dir_path / "foo" / "bar";
|
||||||
if (_mkdir("foo") != 0)
|
std::filesystem::create_directories(temp_dir_path, ec);
|
||||||
|
if (ec)
|
||||||
{
|
{
|
||||||
log_error("ERROR: Unable to create directory foo! (in %s:%d)\n",
|
log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n",
|
||||||
__FILE__, __LINE__);
|
temp_dir_path.u8string().c_str(), ec.value(),
|
||||||
|
ec.message().c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (_mkdir("foo/bar") != 0)
|
|
||||||
{
|
const auto simple_header_path = temp_dir_path / simple_header_name;
|
||||||
log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n",
|
FILE *simple_header_file =
|
||||||
__FILE__, __LINE__);
|
fopen(simple_header_path.u8string().c_str(), "w");
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (_chdir("foo/bar") != 0)
|
|
||||||
{
|
|
||||||
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
FILE *simple_header_file = fopen(simple_header_name, "w");
|
|
||||||
if (simple_header_file == NULL)
|
if (simple_header_file == NULL)
|
||||||
{
|
{
|
||||||
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
|
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
|
||||||
simple_header_name, __FILE__, __LINE__);
|
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (fprintf(simple_header_file, "%s", simple_header) < 0)
|
if (fprintf(simple_header_file, "%s", simple_header) < 0)
|
||||||
{
|
{
|
||||||
log_error(
|
log_error(
|
||||||
"ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
|
"ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
|
||||||
simple_header_name, __FILE__, __LINE__);
|
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (fclose(simple_header_file) != 0)
|
if (fclose(simple_header_file) != 0)
|
||||||
{
|
{
|
||||||
log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
|
log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
|
||||||
simple_header_name, __FILE__, __LINE__);
|
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (_chdir("../..") != 0)
|
|
||||||
{
|
const std::string include_path =
|
||||||
log_error("ERROR: Unable to change to original working directory! (in "
|
std::string("-I") + temp_dir_path.generic_u8string();
|
||||||
"%s:%d)\n",
|
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
|
|
||||||
error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL,
|
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
#else
|
|
||||||
error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL,
|
|
||||||
NULL, NULL);
|
|
||||||
#endif
|
|
||||||
test_error(error,
|
test_error(error,
|
||||||
"Unable to compile a simple program with included header");
|
"Unable to compile a simple program with included header");
|
||||||
|
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
if (_chdir("foo/bar") != 0)
|
std::filesystem::remove_all(temp_dir_path, ec);
|
||||||
|
if (ec)
|
||||||
{
|
{
|
||||||
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
|
log_error("ERROR: Unable to delete directory: %s, error: %d (%s)",
|
||||||
__FILE__, __LINE__);
|
temp_dir_path.u8string().c_str(), ec.value(),
|
||||||
return -1;
|
ec.message().c_str());
|
||||||
}
|
|
||||||
if (_unlink(simple_header_name) != 0)
|
|
||||||
{
|
|
||||||
log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n",
|
|
||||||
simple_header_name, __FILE__, __LINE__);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (_chdir("../..") != 0)
|
|
||||||
{
|
|
||||||
log_error("ERROR: Unable to change to original working directory! (in "
|
|
||||||
"%s:%d)\n",
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (_rmdir("foo/bar") != 0)
|
|
||||||
{
|
|
||||||
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (_rmdir("foo") != 0)
|
|
||||||
{
|
|
||||||
log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n",
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -95,7 +95,9 @@ const char *known_extensions[] = {
|
|||||||
"cl_khr_command_buffer",
|
"cl_khr_command_buffer",
|
||||||
"cl_khr_command_buffer_mutable_dispatch",
|
"cl_khr_command_buffer_mutable_dispatch",
|
||||||
"cl_khr_command_buffer_multi_device",
|
"cl_khr_command_buffer_multi_device",
|
||||||
"cl_khr_external_memory_android_hardware_buffer"
|
"cl_khr_external_memory_android_hardware_buffer",
|
||||||
|
"cl_khr_unified_svm",
|
||||||
|
"cl_khr_spirv_queries"
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|||||||
@@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit(
|
|||||||
compiler_status, supported);
|
compiler_status, supported);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int test_feature_macro_ext_image_unorm_int_2_101010(
|
||||||
|
cl_device_id deviceID, cl_context context, std::string test_macro_name,
|
||||||
|
cl_bool& supported)
|
||||||
|
{
|
||||||
|
cl_int error = TEST_FAIL;
|
||||||
|
cl_bool api_status = CL_TRUE;
|
||||||
|
cl_bool compiler_status;
|
||||||
|
log_info("\n%s ...\n", test_macro_name.c_str());
|
||||||
|
|
||||||
|
if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010"))
|
||||||
|
{
|
||||||
|
supported = false;
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = check_compiler_feature_info(deviceID, context, test_macro_name,
|
||||||
|
compiler_status);
|
||||||
|
if (error != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return feature_macro_verify_results(test_macro_name, api_status,
|
||||||
|
compiler_status, supported);
|
||||||
|
}
|
||||||
|
|
||||||
static int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
|
static int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
|
||||||
std::string test_macro_name,
|
std::string test_macro_name,
|
||||||
cl_bool& supported)
|
cl_bool& supported)
|
||||||
@@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0))
|
|||||||
NEW_FEATURE_MACRO_TEST(int64);
|
NEW_FEATURE_MACRO_TEST(int64);
|
||||||
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
|
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
|
||||||
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
|
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
|
||||||
|
NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010);
|
||||||
|
|
||||||
error |= test_consistency_c_features_list(device, supported_features_vec);
|
error |= test_consistency_c_features_list(device, supported_features_vec);
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,43 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
const std::string slash = "\\";
|
||||||
|
#else
|
||||||
|
const std::string slash = "/";
|
||||||
|
#endif
|
||||||
|
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
|
||||||
|
+ slash + "spirv_bin" + slash + "write_kernel.spv";
|
||||||
|
|
||||||
|
const std::string spvExt = ".spv";
|
||||||
|
|
||||||
|
std::vector<unsigned char> readBinary(const char *file_name)
|
||||||
|
{
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
ifstream file(file_name, ios::in | ios::binary | ios::ate);
|
||||||
|
|
||||||
|
std::vector<char> tmpBuffer(0);
|
||||||
|
|
||||||
|
if (file.is_open())
|
||||||
|
{
|
||||||
|
size_t size = file.tellg();
|
||||||
|
tmpBuffer.resize(size);
|
||||||
|
file.seekg(0, ios::beg);
|
||||||
|
file.read(&tmpBuffer[0], size);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_error("File %s not found\n", file_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<unsigned char> result(tmpBuffer.begin(), tmpBuffer.end());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@@ -299,18 +336,12 @@ public:
|
|||||||
throw unload_test_failure("Failure getting device address bits");
|
throw unload_test_failure("Failure getting device address bits");
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (address_bits)
|
std::vector<unsigned char> kernel_buffer;
|
||||||
{
|
|
||||||
case 32:
|
std::string file_name =
|
||||||
m_spirv_binary = write_kernel_32_spv.data();
|
compilerSpvBinaries + std::to_string(address_bits);
|
||||||
m_spirv_size = write_kernel_32_spv.size();
|
m_spirv_binary = readBinary(file_name.c_str());
|
||||||
break;
|
m_spirv_size = m_spirv_binary.size();
|
||||||
case 64:
|
|
||||||
m_spirv_binary = write_kernel_64_spv.data();
|
|
||||||
m_spirv_size = write_kernel_64_spv.size();
|
|
||||||
break;
|
|
||||||
default: throw unload_test_failure("Invalid address bits");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void create() final
|
void create() final
|
||||||
@@ -320,7 +351,7 @@ public:
|
|||||||
assert(nullptr == m_program);
|
assert(nullptr == m_program);
|
||||||
|
|
||||||
cl_int err = CL_INVALID_PLATFORM;
|
cl_int err = CL_INVALID_PLATFORM;
|
||||||
m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
|
m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0],
|
||||||
m_spirv_size, &err);
|
m_spirv_size, &err);
|
||||||
if (CL_SUCCESS != err)
|
if (CL_SUCCESS != err)
|
||||||
throw unload_test_failure("clCreateProgramWithIL()", err);
|
throw unload_test_failure("clCreateProgramWithIL()", err);
|
||||||
@@ -347,7 +378,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void *m_spirv_binary;
|
std::vector<unsigned char> m_spirv_binary;
|
||||||
size_t m_spirv_size;
|
size_t m_spirv_size;
|
||||||
bool m_enabled;
|
bool m_enabled;
|
||||||
|
|
||||||
|
|||||||
@@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"(
|
|||||||
kernel void write_kernel(global unsigned int *p) {
|
kernel void write_kernel(global unsigned int *p) {
|
||||||
*p = 42;
|
*p = 42;
|
||||||
})";
|
})";
|
||||||
|
|
||||||
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
|
|
||||||
static std::array<unsigned char, 216> write_kernel_64_spv{
|
|
||||||
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
|
|
||||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
|
||||||
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
|
|
||||||
0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
|
||||||
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
|
||||||
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
|
||||||
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
|
||||||
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
|
||||||
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
|
||||||
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
|
||||||
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
|
|
||||||
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
|
|
||||||
static std::array<unsigned char, 216> write_kernel_32_spv{
|
|
||||||
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
|
|
||||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
|
||||||
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
|
|
||||||
0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
|
||||||
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
|
||||||
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
|
||||||
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
|
||||||
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
|
||||||
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
|
||||||
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
|
||||||
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
|
|
||||||
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
|
||||||
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
|
|
||||||
};
|
|
||||||
|
|||||||
@@ -284,6 +284,11 @@ int main( int argc, const char **argv )
|
|||||||
|
|
||||||
static int ParseArgs( int argc, const char **argv )
|
static int ParseArgs( int argc, const char **argv )
|
||||||
{
|
{
|
||||||
|
if (gListTests)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
gArgList = (const char **)calloc( argc, sizeof( char*) );
|
gArgList = (const char **)calloc( argc, sizeof( char*) );
|
||||||
|
|
||||||
if( NULL == gArgList )
|
if( NULL == gArgList )
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
set(MODULE_NAME CONVERSIONS)
|
set(MODULE_NAME CONVERSIONS)
|
||||||
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
|
||||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set (${MODULE_NAME}_SOURCES
|
set (${MODULE_NAME}_SOURCES
|
||||||
Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
|
Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -13,9 +13,11 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
|
#include "harness/mathHelpers.h"
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/compat.h"
|
#include "harness/compat.h"
|
||||||
#include "harness/ThreadPool.h"
|
#include "harness/ThreadPool.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
@@ -52,17 +54,17 @@
|
|||||||
|
|
||||||
#include "basic_test_conversions.h"
|
#include "basic_test_conversions.h"
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_M_IX86) || defined(_M_X64)
|
||||||
#include <mmintrin.h>
|
#include <mmintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#else // !_WIN32
|
#else
|
||||||
#if defined(__SSE__)
|
#if defined(__SSE__)
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#endif // _WIN32
|
#endif
|
||||||
|
|
||||||
cl_context gContext = NULL;
|
cl_context gContext = NULL;
|
||||||
cl_command_queue gQueue = NULL;
|
cl_command_queue gQueue = NULL;
|
||||||
@@ -76,7 +78,6 @@ cl_mem gInBuffer;
|
|||||||
cl_mem gOutBuffers[kCallStyleCount];
|
cl_mem gOutBuffers[kCallStyleCount];
|
||||||
size_t gComputeDevices = 0;
|
size_t gComputeDevices = 0;
|
||||||
uint32_t gDeviceFrequency = 0;
|
uint32_t gDeviceFrequency = 0;
|
||||||
int gWimpyMode = 0;
|
|
||||||
int gWimpyReductionFactor = 128;
|
int gWimpyReductionFactor = 128;
|
||||||
int gSkipTesting = 0;
|
int gSkipTesting = 0;
|
||||||
int gForceFTZ = 0;
|
int gForceFTZ = 0;
|
||||||
@@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
|
|||||||
// destroyed automatically soon after we exit.
|
// destroyed automatically soon after we exit.
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> static bool isnan_fp(const T &v)
|
|
||||||
{
|
|
||||||
if (std::is_same<T, cl_half>::value)
|
|
||||||
{
|
|
||||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
|
||||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
|
||||||
return (h_exp == 0x1F && h_mant != 0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#if !defined(_WIN32)
|
|
||||||
return std::isnan(v);
|
|
||||||
#else
|
|
||||||
return _isnan(v);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename InType>
|
template <typename InType>
|
||||||
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
|
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -80,7 +80,6 @@ extern int gHasDouble;
|
|||||||
extern int gTestDouble;
|
extern int gTestDouble;
|
||||||
extern int gHasHalfs;
|
extern int gHasHalfs;
|
||||||
extern int gTestHalfs;
|
extern int gTestHalfs;
|
||||||
extern int gWimpyMode;
|
|
||||||
extern int gWimpyReductionFactor;
|
extern int gWimpyReductionFactor;
|
||||||
extern int gSkipTesting;
|
extern int gSkipTesting;
|
||||||
extern int gMinVectorSize;
|
extern int gMinVectorSize;
|
||||||
|
|||||||
@@ -343,7 +343,7 @@ float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
|||||||
volatile float x = f;
|
volatile float x = f;
|
||||||
float magicVal = magic[f < 0];
|
float magicVal = magic[f < 0];
|
||||||
|
|
||||||
#if defined(__SSE__)
|
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||||
// incorrectly
|
// incorrectly
|
||||||
__m128 v = _mm_set_ss(x);
|
__m128 v = _mm_set_ss(x);
|
||||||
@@ -376,7 +376,7 @@ DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
|||||||
{
|
{
|
||||||
volatile double x = f;
|
volatile double x = f;
|
||||||
double magicVal = magic[f < 0];
|
double magicVal = magic[f < 0];
|
||||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||||
// incorrectly
|
// incorrectly
|
||||||
__m128d v = _mm_set_sd(x);
|
__m128d v = _mm_set_sd(x);
|
||||||
@@ -479,7 +479,7 @@ void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
|||||||
{
|
{
|
||||||
if (std::is_same<cl_double, OutType>::value)
|
if (std::is_same<cl_double, OutType>::value)
|
||||||
{
|
{
|
||||||
#if defined(_MSC_VER)
|
#if defined(_M_IX86) || defined(_M_X64)
|
||||||
double result;
|
double result;
|
||||||
|
|
||||||
if (std::is_same<cl_ulong, InType>::value)
|
if (std::is_same<cl_ulong, InType>::value)
|
||||||
|
|||||||
@@ -112,6 +112,35 @@ int main(int argc, const char **argv)
|
|||||||
int error;
|
int error;
|
||||||
|
|
||||||
argc = parseCustomParam(argc, argv);
|
argc = parseCustomParam(argc, argv);
|
||||||
|
if (gListTests)
|
||||||
|
{
|
||||||
|
for (unsigned dst = 0; dst < kTypeCount; dst++)
|
||||||
|
{
|
||||||
|
for (unsigned src = 0; src < kTypeCount; src++)
|
||||||
|
{
|
||||||
|
for (unsigned sat = 0; sat < 2; sat++)
|
||||||
|
{
|
||||||
|
// skip illegal saturated conversions to float type
|
||||||
|
if (gSaturationNames[sat] == std::string("_sat")
|
||||||
|
&& (gTypeNames[dst] == std::string("float")
|
||||||
|
|| gTypeNames[dst] == std::string("half")
|
||||||
|
|| gTypeNames[dst] == std::string("double")))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++)
|
||||||
|
{
|
||||||
|
vlog("\t%s\n",
|
||||||
|
(std::string(gTypeNames[dst])
|
||||||
|
+ gSaturationNames[sat] + gRoundingModeNames[rnd]
|
||||||
|
+ "_" + gTypeNames[src])
|
||||||
|
.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if (argc == -1)
|
if (argc == -1)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
@@ -218,7 +247,6 @@ static int ParseArgs(int argc, const char **argv)
|
|||||||
case 'h': gTestHalfs ^= 1; break;
|
case 'h': gTestHalfs ^= 1; break;
|
||||||
case 'l': gSkipTesting ^= 1; break;
|
case 'l': gSkipTesting ^= 1; break;
|
||||||
case 'm': gMultithread ^= 1; break;
|
case 'm': gMultithread ^= 1; break;
|
||||||
case 'w': gWimpyMode ^= 1; break;
|
|
||||||
case '[':
|
case '[':
|
||||||
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||||
break;
|
break;
|
||||||
@@ -287,14 +315,6 @@ static int ParseArgs(int argc, const char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for the wimpy mode environment variable
|
|
||||||
if (getenv("CL_WIMPY_MODE"))
|
|
||||||
{
|
|
||||||
vlog("\n");
|
|
||||||
vlog("*** Detected CL_WIMPY_MODE env ***\n");
|
|
||||||
gWimpyMode = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
vlog("\n");
|
vlog("\n");
|
||||||
|
|
||||||
PrintArch();
|
PrintArch();
|
||||||
@@ -335,9 +355,6 @@ static void PrintUsage(void)
|
|||||||
vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
|
vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
|
||||||
"just check to see that the kernels build. (Off by default.)\n");
|
"just check to see that the kernels build. (Off by default.)\n");
|
||||||
vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
|
vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
|
||||||
vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very "
|
|
||||||
"small subset of the tests for each fn. NOT A VALID TEST! (Off by "
|
|
||||||
"default.)\n");
|
|
||||||
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||||
"1-12, default factor(%u)\n",
|
"1-12, default factor(%u)\n",
|
||||||
gWimpyReductionFactor);
|
gWimpyReductionFactor);
|
||||||
|
|||||||
@@ -1,22 +1,4 @@
|
|||||||
if(WIN32)
|
if(WIN32)
|
||||||
|
|
||||||
set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
|
|
||||||
|
|
||||||
if(${ARCH} STREQUAL "i686")
|
|
||||||
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
|
|
||||||
endif(${ARCH} STREQUAL "i686")
|
|
||||||
|
|
||||||
if(${ARCH} STREQUAL "x86_64")
|
|
||||||
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
|
|
||||||
endif(${ARCH} STREQUAL "x86_64")
|
|
||||||
|
|
||||||
list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR})
|
|
||||||
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
|
|
||||||
${CLConform_INCLUDE_DIR} )
|
|
||||||
link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR})
|
|
||||||
|
|
||||||
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
|
|
||||||
|
|
||||||
set(MODULE_NAME D3D10)
|
set(MODULE_NAME D3D10)
|
||||||
|
|
||||||
set(${MODULE_NAME}_SOURCES
|
set(${MODULE_NAME}_SOURCES
|
||||||
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
harness.cpp
|
harness.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set_source_files_properties(
|
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
|
||||||
${MODULE_NAME}_SOURCES
|
|
||||||
PROPERTIES LANGUAGE CXX)
|
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
include(../CMakeCommon.txt)
|
include(../CMakeCommon.txt)
|
||||||
endif(WIN32)
|
else()
|
||||||
|
message(STATUS "D3D10 tests are only supported on Windows.")
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -1,22 +1,4 @@
|
|||||||
if(WIN32)
|
if(WIN32)
|
||||||
|
|
||||||
set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
|
|
||||||
|
|
||||||
if(${ARCH} STREQUAL "i686")
|
|
||||||
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
|
|
||||||
endif(${ARCH} STREQUAL "i686")
|
|
||||||
|
|
||||||
if(${ARCH} STREQUAL "x86_64")
|
|
||||||
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
|
|
||||||
endif(${ARCH} STREQUAL "x86_64")
|
|
||||||
|
|
||||||
list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR})
|
|
||||||
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
|
|
||||||
${CLConform_INCLUDE_DIR} )
|
|
||||||
link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR})
|
|
||||||
|
|
||||||
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
|
|
||||||
|
|
||||||
set(MODULE_NAME D3D11)
|
set(MODULE_NAME D3D11)
|
||||||
|
|
||||||
set(${MODULE_NAME}_SOURCES
|
set(${MODULE_NAME}_SOURCES
|
||||||
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
harness.cpp
|
harness.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set_source_files_properties(
|
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
|
||||||
${MODULE_NAME}_SOURCES
|
|
||||||
PROPERTIES LANGUAGE CXX)
|
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
include(../CMakeCommon.txt)
|
include(../CMakeCommon.txt)
|
||||||
endif(WIN32)
|
else()
|
||||||
|
message(STATUS "D3D11 tests are only supported on Windows.")
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -25,7 +26,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
static const char* enqueue_simple_block[] = { R"(
|
static const char* enqueue_simple_block[] = { R"(
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -25,7 +26,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
#define BITS_DEPTH 28
|
#define BITS_DEPTH 28
|
||||||
|
|
||||||
static const char* enqueue_flags_wait_kernel_simple[] =
|
static const char* enqueue_flags_wait_kernel_simple[] =
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -24,10 +25,7 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
static const char enqueue_block_multi_queue[] =
|
static const char enqueue_block_multi_queue[] =
|
||||||
NL "#define BLOCK_COMPLETED 0"
|
NL "#define BLOCK_COMPLETED 0"
|
||||||
NL "#define BLOCK_SUBMITTED 1"
|
NL "#define BLOCK_SUBMITTED 1"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -26,7 +27,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
static const char *helper_ndrange_1d_glo[] = {
|
static const char *helper_ndrange_1d_glo[] = {
|
||||||
NL,
|
NL,
|
||||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -25,7 +26,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
static int nestingLevel = 3;
|
static int nestingLevel = 3;
|
||||||
|
|
||||||
static const char* enqueue_1D_wg_size_single[] =
|
static const char* enqueue_1D_wg_size_single[] =
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -25,7 +26,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
|
||||||
static const char* multi_queue_simple_block1[] =
|
static const char* multi_queue_simple_block1[] =
|
||||||
{
|
{
|
||||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -24,8 +25,6 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
extern int gWimpyMode;
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
|
|
||||||
static const char* enqueue_block_first_kernel[] =
|
static const char* enqueue_block_first_kernel[] =
|
||||||
|
|||||||
@@ -25,7 +25,6 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
std::string gKernelName;
|
std::string gKernelName;
|
||||||
int gWimpyMode = 0;
|
|
||||||
|
|
||||||
test_status InitCL(cl_device_id device) {
|
test_status InitCL(cl_device_id device) {
|
||||||
auto version = get_device_cl_version(device);
|
auto version = get_device_cl_version(device);
|
||||||
@@ -71,11 +70,6 @@ int main(int argc, const char *argv[])
|
|||||||
gKernelName = std::string(argv[i + 1]);
|
gKernelName = std::string(argv[i + 1]);
|
||||||
argsRemoveNum += 2;
|
argsRemoveNum += 2;
|
||||||
}
|
}
|
||||||
if (strcmp(argv[i], "-w") == 0 ){
|
|
||||||
gWimpyMode = 1;
|
|
||||||
argsRemoveNum += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (argsRemoveNum > 0) {
|
if (argsRemoveNum > 0) {
|
||||||
for (int j = i; j < (argc - argsRemoveNum); ++j)
|
for (int j = i; j < (argc - argsRemoveNum); ++j)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -27,7 +28,6 @@
|
|||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
|
|
||||||
static int gNestingLevel = 4;
|
static int gNestingLevel = 4;
|
||||||
extern int gWimpyMode;
|
|
||||||
|
|
||||||
static const char* enqueue_nested_blocks_single[] =
|
static const char* enqueue_nested_blocks_single[] =
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -5,6 +5,9 @@
|
|||||||
add_subdirectory( cl_ext_cxx_for_opencl )
|
add_subdirectory( cl_ext_cxx_for_opencl )
|
||||||
add_subdirectory( cl_khr_command_buffer )
|
add_subdirectory( cl_khr_command_buffer )
|
||||||
add_subdirectory( cl_khr_dx9_media_sharing )
|
add_subdirectory( cl_khr_dx9_media_sharing )
|
||||||
|
if(ANDROID_PLATFORM GREATER 28)
|
||||||
|
add_subdirectory( cl_khr_external_memory_ahb )
|
||||||
|
endif ()
|
||||||
add_subdirectory( cl_khr_external_memory_dma_buf )
|
add_subdirectory( cl_khr_external_memory_dma_buf )
|
||||||
add_subdirectory( cl_khr_semaphore )
|
add_subdirectory( cl_khr_semaphore )
|
||||||
add_subdirectory( cl_khr_kernel_clock )
|
add_subdirectory( cl_khr_kernel_clock )
|
||||||
|
|||||||
@@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip()
|
|||||||
{
|
{
|
||||||
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
|
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_int EnqueueAndReleaseTest::Run()
|
||||||
|
{
|
||||||
|
cl_int error = clCommandNDRangeKernelKHR(
|
||||||
|
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||||
|
nullptr, 0, nullptr, nullptr, nullptr);
|
||||||
|
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||||
|
|
||||||
|
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||||
|
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||||
|
|
||||||
|
cl_int pattern = 42;
|
||||||
|
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
|
||||||
|
data_size(), 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed");
|
||||||
|
|
||||||
|
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr,
|
||||||
|
nullptr);
|
||||||
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Calls release on cl_command_buffer_khr handle inside wrapper class, and
|
||||||
|
// sets the handle to nullptr, so that release doesn't get called again at
|
||||||
|
// end of test when wrapper object is destroyed.
|
||||||
|
command_buffer.reset();
|
||||||
|
|
||||||
|
std::vector<cl_int> output_data(num_elements);
|
||||||
|
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
|
||||||
|
output_data.data(), 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueReadBuffer failed");
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
|
{
|
||||||
|
CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|||||||
@@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
|
|||||||
bool Skip() override;
|
bool Skip() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Test releasing a command-buffer after it has been submitted for execution,
|
||||||
|
// but before the user has waited on completion of the enqueue.
|
||||||
|
struct EnqueueAndReleaseTest : public BasicCommandBufferTest
|
||||||
|
{
|
||||||
|
using BasicCommandBufferTest::BasicCommandBufferTest;
|
||||||
|
|
||||||
|
cl_int Run() override;
|
||||||
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
int MakeAndRunTest(cl_device_id device, cl_context context,
|
int MakeAndRunTest(cl_device_id device, cl_context context,
|
||||||
cl_command_queue queue, int num_elements)
|
cl_command_queue queue, int num_elements)
|
||||||
|
|||||||
@@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush)
|
|||||||
return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
|
return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
|
||||||
num_elements);
|
num_elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(enqueue_and_release)
|
||||||
|
{
|
||||||
|
return MakeAndRunTest<EnqueueAndReleaseTest>(device, context, queue,
|
||||||
|
num_elements);
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
mutable_command_multiple_dispatches.cpp
|
mutable_command_multiple_dispatches.cpp
|
||||||
mutable_command_iterative_arg_update.cpp
|
mutable_command_iterative_arg_update.cpp
|
||||||
mutable_command_work_groups.cpp
|
mutable_command_work_groups.cpp
|
||||||
|
mutable_command_work_dim.cpp
|
||||||
|
mutable_command_update_state.cpp
|
||||||
../basic_command_buffer.cpp
|
../basic_command_buffer.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
|
|||||||
for (size_t i = 0; i < num_elements; i++)
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
if (i >= update_global_size && global_work_size != resultData[i])
|
if (i >= update_global_size && global_work_size != resultData[i])
|
||||||
{
|
{
|
||||||
log_error("Data failed to verify: update_global_size != "
|
log_error("Data failed to verify: global_work_size != "
|
||||||
"resultData[%zu]=%d\n",
|
"resultData[%zu]=%d\n",
|
||||||
i, resultData[i]);
|
i, resultData[i]);
|
||||||
return TEST_FAIL;
|
return TEST_FAIL;
|
||||||
@@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
|
|||||||
|
|
||||||
size_t info_global_size = 0;
|
size_t info_global_size = 0;
|
||||||
const size_t update_global_size = 3;
|
const size_t update_global_size = 3;
|
||||||
const size_t sizeToAllocate = global_work_size;
|
const size_t sizeToAllocate = global_work_size * sizeof(cl_int);
|
||||||
const size_t num_elements = sizeToAllocate / sizeof(cl_int);
|
const size_t num_elements = sizeToAllocate / sizeof(cl_int);
|
||||||
cl_mutable_command_khr command = nullptr;
|
cl_mutable_command_khr command = nullptr;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest
|
|||||||
: InfoMutableCommandBufferTest(device, context, queue)
|
: InfoMutableCommandBufferTest(device, context, queue)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
virtual bool Skip() override
|
|
||||||
{
|
|
||||||
Version device_version = get_device_cl_version(device);
|
|
||||||
if ((device_version >= Version(3, 0))
|
|
||||||
|| is_extension_available(device, "cl_khr_extended_versioning"))
|
|
||||||
{
|
|
||||||
|
|
||||||
cl_version extension_version = get_extension_version(
|
|
||||||
device, "cl_khr_command_buffer_mutable_dispatch");
|
|
||||||
|
|
||||||
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
|
|
||||||
{
|
|
||||||
log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 "
|
|
||||||
"is required to run the test, skipping.\n ");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return InfoMutableCommandBufferTest::Skip();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl_int Run() override
|
cl_int Run() override
|
||||||
{
|
{
|
||||||
cl_command_properties_khr props[] = {
|
cl_command_properties_khr props[] = {
|
||||||
|
|||||||
@@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
{
|
{
|
||||||
cl_int offset;
|
cl_int offset;
|
||||||
std::vector<cl_int> output_buffer;
|
std::vector<cl_int> output_buffer;
|
||||||
|
std::vector<cl_int> updated_output_buffer;
|
||||||
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
|
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
|
||||||
clEventWrapper wait_events[3];
|
clEventWrapper wait_events[3];
|
||||||
};
|
};
|
||||||
@@ -337,6 +338,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
* buffer_size_multiplier,
|
* buffer_size_multiplier,
|
||||||
nullptr, &error);
|
nullptr, &error);
|
||||||
test_error(error, "clCreateBuffer failed");
|
test_error(error, "clCreateBuffer failed");
|
||||||
|
// Retain new output memory object until the end of the test.
|
||||||
|
retained_output_buffers.push_back(new_out_mem);
|
||||||
|
|
||||||
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
||||||
&new_out_mem };
|
&new_out_mem };
|
||||||
@@ -373,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
|
|
||||||
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
|
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
|
||||||
pd.offset * sizeof(cl_int), data_size(),
|
pd.offset * sizeof(cl_int), data_size(),
|
||||||
pd.output_buffer.data(), 1,
|
pd.updated_output_buffer.data(), 1,
|
||||||
&pd.wait_events[2], nullptr);
|
&pd.wait_events[2], nullptr);
|
||||||
test_error(error, "clEnqueueReadBuffer failed");
|
test_error(error, "clEnqueueReadBuffer failed");
|
||||||
|
|
||||||
@@ -388,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
cl_int offset = static_cast<cl_int>(num_elements);
|
cl_int offset = static_cast<cl_int>(num_elements);
|
||||||
|
|
||||||
std::vector<SimulPassData> simul_passes = {
|
std::vector<SimulPassData> simul_passes = {
|
||||||
{ 0, std::vector<cl_int>(num_elements) },
|
{ 0, std::vector<cl_int>(num_elements),
|
||||||
{ offset, std::vector<cl_int>(num_elements) }
|
std::vector<cl_int>(num_elements) },
|
||||||
|
{ offset, std::vector<cl_int>(num_elements),
|
||||||
|
std::vector<cl_int>(num_elements) }
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto&& pass : simul_passes)
|
for (auto&& pass : simul_passes)
|
||||||
@@ -407,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
test_error(error, "clFinish failed");
|
test_error(error, "clFinish failed");
|
||||||
|
|
||||||
// verify the result buffers
|
// verify the result buffers
|
||||||
for (auto&& pass : simul_passes)
|
auto& first_pass_output = simul_passes[0].output_buffer;
|
||||||
{
|
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
|
||||||
auto& res_data = pass.output_buffer;
|
auto& second_pass_output = simul_passes[1].output_buffer;
|
||||||
|
auto& second_pass_updated_output =
|
||||||
|
simul_passes[1].updated_output_buffer;
|
||||||
for (size_t i = 0; i < num_elements; i++)
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
{
|
{
|
||||||
CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
|
// First pass:
|
||||||
}
|
// Before updating, out_mem is copied from in_mem (pattern_pri)
|
||||||
|
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
|
||||||
|
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||||
|
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
|
||||||
|
i);
|
||||||
|
// Second pass:
|
||||||
|
// Before updating, out_mem is filled with overwritten_pattern
|
||||||
|
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
|
||||||
|
i);
|
||||||
|
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||||
|
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
|
||||||
|
i);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
@@ -429,6 +447,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
|||||||
clKernelWrapper kernel_fill;
|
clKernelWrapper kernel_fill;
|
||||||
clProgramWrapper program_fill;
|
clProgramWrapper program_fill;
|
||||||
|
|
||||||
|
std::vector<clMemWrapper> retained_output_buffers;
|
||||||
|
|
||||||
const size_t test_global_work_size = 3 * sizeof(cl_int);
|
const size_t test_global_work_size = 3 * sizeof(cl_int);
|
||||||
const cl_int pattern_pri = 42;
|
const cl_int pattern_pri = 42;
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,280 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "testHarness.h"
|
||||||
|
#include "mutable_command_basic.h"
|
||||||
|
|
||||||
|
#include <CL/cl_ext.h>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Tests related to ensuring the state of the updated command-buffer is expected
|
||||||
|
// and the effects of operations on it don't have side effects on other objects.
|
||||||
|
//
|
||||||
|
// - Tests the updates applied to a command-buffer persist over all subsequent
|
||||||
|
// enqueues.
|
||||||
|
// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension.
|
||||||
|
|
||||||
|
struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest
|
||||||
|
{
|
||||||
|
MutableDispatchUpdateStateTest(cl_device_id device, cl_context context,
|
||||||
|
cl_command_queue queue)
|
||||||
|
: BasicMutableCommandBufferTest(device, context, queue),
|
||||||
|
buffer(nullptr), command(nullptr)
|
||||||
|
{}
|
||||||
|
|
||||||
|
bool Skip() override
|
||||||
|
{
|
||||||
|
if (BasicMutableCommandBufferTest::Skip()) return true;
|
||||||
|
|
||||||
|
cl_mutable_dispatch_fields_khr mutable_capabilities;
|
||||||
|
bool mutable_support =
|
||||||
|
!clGetDeviceInfo(
|
||||||
|
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
|
||||||
|
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||||
|
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
|
||||||
|
return !mutable_support;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int SetUpKernelArgs() override
|
||||||
|
{
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
|
num_elements * sizeof(cl_int), nullptr, &error);
|
||||||
|
test_error(error, "clCreateBuffer error");
|
||||||
|
|
||||||
|
// Zero initialize buffer
|
||||||
|
const cl_int zero_pattern = 0;
|
||||||
|
error = clEnqueueFillBuffer(
|
||||||
|
queue, buffer, &zero_pattern, sizeof(cl_int), 0,
|
||||||
|
num_elements * sizeof(cl_int), 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed");
|
||||||
|
|
||||||
|
error = clFinish(queue);
|
||||||
|
test_error(error, "clFinish failed");
|
||||||
|
|
||||||
|
error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
|
||||||
|
test_error(error, "Unable to set kernel argument 0");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int SetUpKernel() override
|
||||||
|
{
|
||||||
|
const char *add_kernel =
|
||||||
|
R"(
|
||||||
|
__kernel void add_kernel(__global int *data, int value)
|
||||||
|
{
|
||||||
|
size_t tid = get_global_id(0);
|
||||||
|
data[tid] += value;
|
||||||
|
})";
|
||||||
|
|
||||||
|
cl_int error = create_single_kernel_helper(
|
||||||
|
context, &program, &kernel, 1, &add_kernel, "add_kernel");
|
||||||
|
test_error(error, "Creating kernel failed");
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool verify_result(cl_int ref)
|
||||||
|
{
|
||||||
|
std::vector<cl_int> data(num_elements);
|
||||||
|
cl_int error =
|
||||||
|
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
|
||||||
|
data.data(), 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueReadBuffer failed");
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
|
{
|
||||||
|
if (data[i] != ref)
|
||||||
|
{
|
||||||
|
log_error("Modified verification failed at index %zu: Got %d, "
|
||||||
|
"wanted %d\n",
|
||||||
|
i, data[i], ref);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
clMemWrapper buffer;
|
||||||
|
cl_mutable_command_khr command;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest
|
||||||
|
{
|
||||||
|
MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context,
|
||||||
|
cl_command_queue queue)
|
||||||
|
: MutableDispatchUpdateStateTest(device, context, queue)
|
||||||
|
{}
|
||||||
|
|
||||||
|
cl_int Run() override
|
||||||
|
{
|
||||||
|
const cl_int original_val = 42;
|
||||||
|
cl_int error =
|
||||||
|
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
|
||||||
|
test_error(error, "Unable to set kernel argument 1");
|
||||||
|
|
||||||
|
cl_command_properties_khr props[] = {
|
||||||
|
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
|
||||||
|
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
error = clCommandNDRangeKernelKHR(
|
||||||
|
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
|
||||||
|
nullptr, 0, nullptr, nullptr, &command);
|
||||||
|
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||||
|
|
||||||
|
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||||
|
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Modify the command buffer before executing
|
||||||
|
const cl_int new_command_val = 5;
|
||||||
|
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
|
||||||
|
&new_command_val };
|
||||||
|
cl_mutable_dispatch_config_khr dispatch_config{
|
||||||
|
command,
|
||||||
|
1 /* num_args */,
|
||||||
|
0 /* num_svm_arg */,
|
||||||
|
0 /* num_exec_infos */,
|
||||||
|
0 /* work_dim - 0 means no change to dimensions */,
|
||||||
|
&arg /* arg_list */,
|
||||||
|
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||||
|
nullptr /* exec_info_list */,
|
||||||
|
nullptr /* global_work_offset */,
|
||||||
|
nullptr /* global_work_size */,
|
||||||
|
nullptr /* local_work_size */
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_uint num_configs = 1;
|
||||||
|
cl_command_buffer_update_type_khr config_types[1] = {
|
||||||
|
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||||
|
};
|
||||||
|
const void *configs[1] = { &dispatch_config };
|
||||||
|
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||||
|
config_types, configs);
|
||||||
|
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||||
|
|
||||||
|
const unsigned iterations = 5;
|
||||||
|
for (unsigned i = 0; i < iterations; i++)
|
||||||
|
{
|
||||||
|
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||||
|
nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
|
error = clFinish(queue);
|
||||||
|
test_error(error, "clFinish failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the results execution sequence is the clEnqueueNDRangeKernel
|
||||||
|
// value + the updated command-buffer value, not using the original
|
||||||
|
// command value in the operation.
|
||||||
|
constexpr cl_int ref = iterations * new_command_val;
|
||||||
|
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest
|
||||||
|
{
|
||||||
|
MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context,
|
||||||
|
cl_command_queue queue)
|
||||||
|
: MutableDispatchUpdateStateTest(device, context, queue)
|
||||||
|
{}
|
||||||
|
|
||||||
|
cl_int Run() override
|
||||||
|
{
|
||||||
|
const cl_int original_val = 42;
|
||||||
|
cl_int error =
|
||||||
|
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
|
||||||
|
test_error(error, "Unable to set kernel argument 1");
|
||||||
|
|
||||||
|
cl_command_properties_khr props[] = {
|
||||||
|
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
|
||||||
|
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
error = clCommandNDRangeKernelKHR(
|
||||||
|
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
|
||||||
|
nullptr, 0, nullptr, nullptr, &command);
|
||||||
|
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||||
|
|
||||||
|
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||||
|
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Set new kernel argument for later clEnqueueNDRangeKernel
|
||||||
|
const cl_int new_eager_val = 10;
|
||||||
|
error =
|
||||||
|
clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val);
|
||||||
|
test_error(error, "Unable to set kernel argument 1");
|
||||||
|
|
||||||
|
// Modify the command buffer before executing
|
||||||
|
const cl_int new_command_val = 5;
|
||||||
|
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
|
||||||
|
&new_command_val };
|
||||||
|
cl_mutable_dispatch_config_khr dispatch_config{
|
||||||
|
command,
|
||||||
|
1 /* num_args */,
|
||||||
|
0 /* num_svm_arg */,
|
||||||
|
0 /* num_exec_infos */,
|
||||||
|
0 /* work_dim - 0 means no change to dimensions */,
|
||||||
|
&arg /* arg_list */,
|
||||||
|
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||||
|
nullptr /* exec_info_list */,
|
||||||
|
nullptr /* global_work_offset */,
|
||||||
|
nullptr /* global_work_size */,
|
||||||
|
nullptr /* local_work_size */
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_uint num_configs = 1;
|
||||||
|
cl_command_buffer_update_type_khr config_types[1] = {
|
||||||
|
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||||
|
};
|
||||||
|
const void *configs[1] = { &dispatch_config };
|
||||||
|
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||||
|
config_types, configs);
|
||||||
|
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||||
|
|
||||||
|
// Eager kernel enqueue, followed by command-buffer enqueue
|
||||||
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements,
|
||||||
|
nullptr, 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||||
|
|
||||||
|
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||||
|
nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Check the results execution sequence is the clEnqueueNDRangeKernel
|
||||||
|
// value + the updated command-buffer value, not using the original
|
||||||
|
// command value in the operation.
|
||||||
|
constexpr cl_int ref = new_eager_val + new_command_val;
|
||||||
|
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(mutable_dispatch_updates_persist)
|
||||||
|
{
|
||||||
|
return MakeAndRunTest<MutableDispatchUpdatesPersistTest>(
|
||||||
|
device, context, queue, num_elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(mutable_dispatch_set_kernel_arg)
|
||||||
|
{
|
||||||
|
return MakeAndRunTest<MutableDispatchSetKernelArgTest>(device, context,
|
||||||
|
queue, num_elements);
|
||||||
|
}
|
||||||
@@ -0,0 +1,225 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <extensionHelpers.h>
|
||||||
|
#include "mutable_command_basic.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include <CL/cl_ext.h>
|
||||||
|
|
||||||
|
// mutable dispatch tests setting `work_dim` to the original 3D value
|
||||||
|
// behaves as expected.
|
||||||
|
|
||||||
|
struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest
|
||||||
|
{
|
||||||
|
using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
|
||||||
|
|
||||||
|
MutableDispatchWorkDim(cl_device_id device, cl_context context,
|
||||||
|
cl_command_queue queue)
|
||||||
|
: InfoMutableCommandBufferTest(device, context, queue)
|
||||||
|
{}
|
||||||
|
|
||||||
|
cl_int SetUp(int elements) override
|
||||||
|
{
|
||||||
|
result_data.resize(update_total_elements);
|
||||||
|
return InfoMutableCommandBufferTest::SetUp(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Skip() override
|
||||||
|
{
|
||||||
|
cl_mutable_dispatch_fields_khr mutable_capabilities;
|
||||||
|
|
||||||
|
bool mutable_support =
|
||||||
|
!clGetDeviceInfo(
|
||||||
|
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
|
||||||
|
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||||
|
&& (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR);
|
||||||
|
|
||||||
|
return !mutable_support || InfoMutableCommandBufferTest::Skip();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements)
|
||||||
|
{
|
||||||
|
std::memset(result_data.data(), 0, alloc_size);
|
||||||
|
cl_int error =
|
||||||
|
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size,
|
||||||
|
result_data.data(), 0, nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueReadBuffer failed");
|
||||||
|
|
||||||
|
for (size_t i = 0; i < total_elements; i++)
|
||||||
|
{
|
||||||
|
if (result_data[i] != expected_value)
|
||||||
|
{
|
||||||
|
log_error("Data failed to verify at index %zu. "
|
||||||
|
"Expected %u, result was %u\n",
|
||||||
|
i, expected_value, result_data[i]);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int Run() override
|
||||||
|
{
|
||||||
|
const char *global_size_kernel =
|
||||||
|
R"(
|
||||||
|
__kernel void three_dim(__global uint *dst0,
|
||||||
|
__global uint *dst1,
|
||||||
|
__global uint *dst2)
|
||||||
|
{
|
||||||
|
size_t gid = get_global_linear_id();
|
||||||
|
dst0[gid] = get_global_size(0);
|
||||||
|
dst1[gid] = get_global_size(1);
|
||||||
|
dst2[gid] = get_global_size(2);
|
||||||
|
})";
|
||||||
|
|
||||||
|
cl_int error = create_single_kernel_helper(
|
||||||
|
context, &program, &kernel, 1, &global_size_kernel, "three_dim");
|
||||||
|
test_error(error, "Creating kernel failed");
|
||||||
|
|
||||||
|
// Create a buffer for each of the three dimensions to write the
|
||||||
|
// global size into.
|
||||||
|
clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
|
alloc_size, nullptr, &error);
|
||||||
|
test_error(error, "Creating test array failed");
|
||||||
|
|
||||||
|
clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
|
alloc_size, nullptr, &error);
|
||||||
|
test_error(error, "Creating test array failed");
|
||||||
|
|
||||||
|
clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||||
|
alloc_size, nullptr, &error);
|
||||||
|
test_error(error, "Creating test array failed");
|
||||||
|
|
||||||
|
// Set the arguments
|
||||||
|
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1);
|
||||||
|
test_error(error, "Unable to set indexed kernel arguments");
|
||||||
|
|
||||||
|
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2);
|
||||||
|
test_error(error, "Unable to set indexed kernel arguments");
|
||||||
|
|
||||||
|
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3);
|
||||||
|
test_error(error, "Unable to set indexed kernel arguments");
|
||||||
|
|
||||||
|
// Command-buffer contains a single kernel
|
||||||
|
error = clCommandNDRangeKernelKHR(
|
||||||
|
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
|
||||||
|
global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command);
|
||||||
|
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||||
|
|
||||||
|
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||||
|
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Enqueue command-buffer and wait on completion
|
||||||
|
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||||
|
nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
|
error = clFinish(queue);
|
||||||
|
test_error(error, "clFinish failed.");
|
||||||
|
|
||||||
|
// Verify results before any update
|
||||||
|
if (!Verify(stream1, global_size_3D[0], original_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
if (!Verify(stream2, global_size_3D[1], original_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
if (!Verify(stream3, global_size_3D[2], original_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update command with a mutable config where we use a different 3D
|
||||||
|
// global size, but hardcode `work_dim` to 3 (the original value).
|
||||||
|
cl_mutable_dispatch_config_khr dispatch_config{
|
||||||
|
command,
|
||||||
|
0 /* num_args */,
|
||||||
|
0 /* num_svm_arg */,
|
||||||
|
0 /* num_exec_infos */,
|
||||||
|
work_dim /* work_dim */,
|
||||||
|
nullptr /* arg_list */,
|
||||||
|
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||||
|
nullptr /* exec_info_list */,
|
||||||
|
nullptr /* global_work_offset */,
|
||||||
|
update_global_size_3D.data() /* global_work_size */,
|
||||||
|
nullptr /* local_work_size */
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_uint num_configs = 1;
|
||||||
|
cl_command_buffer_update_type_khr config_types[1] = {
|
||||||
|
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||||
|
};
|
||||||
|
const void *configs[1] = { &dispatch_config };
|
||||||
|
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||||
|
config_types, configs);
|
||||||
|
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||||
|
|
||||||
|
// Enqueue updated command-buffer
|
||||||
|
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||||
|
nullptr, nullptr);
|
||||||
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
|
// Verify update is reflected in buffer output.
|
||||||
|
if (!Verify(stream1, update_global_size_3D[0], update_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
if (!Verify(stream2, update_global_size_3D[1], update_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
if (!Verify(stream3, update_global_size_3D[2], update_total_elements))
|
||||||
|
{
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const cl_uint work_dim = 3;
|
||||||
|
// 3D global size of kernel command when created
|
||||||
|
static const size_t original_elements = 2;
|
||||||
|
static constexpr std::array<size_t, work_dim> global_size_3D = {
|
||||||
|
original_elements, original_elements, original_elements
|
||||||
|
};
|
||||||
|
// 3D global size to update kernel command to.
|
||||||
|
static const size_t update_elements = 4;
|
||||||
|
static constexpr std::array<size_t, work_dim> update_global_size_3D = {
|
||||||
|
update_elements, update_elements, update_elements
|
||||||
|
};
|
||||||
|
// Total number of work items in original and updated grids
|
||||||
|
static const size_t original_total_elements =
|
||||||
|
original_elements * original_elements * original_elements;
|
||||||
|
static const size_t update_total_elements =
|
||||||
|
update_elements * update_elements * update_elements;
|
||||||
|
// Size in bytes of each of the 3 cl_mem buffers (using the larger size)
|
||||||
|
static const size_t alloc_size = update_total_elements * sizeof(cl_uint);
|
||||||
|
|
||||||
|
cl_mutable_command_khr command = nullptr;
|
||||||
|
std::vector<cl_uint> result_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
// get_global_linear() used in kernel is an OpenCL 2.0 API
|
||||||
|
REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0))
|
||||||
|
{
|
||||||
|
return MakeAndRunTest<MutableDispatchWorkDim>(device, context, queue,
|
||||||
|
num_elements);
|
||||||
|
}
|
||||||
@@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
|
|||||||
&trigger_event, &execute_event);
|
&trigger_event, &execute_event);
|
||||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||||
|
|
||||||
// verify pending state
|
|
||||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
|
||||||
|
|
||||||
// execute command buffer
|
// execute command buffer
|
||||||
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
||||||
|
|
||||||
|
|||||||
@@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
|
|||||||
|
|
||||||
error = EnqueueCommandBuffer();
|
error = EnqueueCommandBuffer();
|
||||||
test_error(error, "EnqueueCommandBuffer failed");
|
test_error(error, "EnqueueCommandBuffer failed");
|
||||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
|
||||||
test_error(error, "State is not Pending");
|
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
|
|||||||
|
|
||||||
error = EnqueueCommandBuffer();
|
error = EnqueueCommandBuffer();
|
||||||
test_error(error, "EnqueueCommandBuffer failed");
|
test_error(error, "EnqueueCommandBuffer failed");
|
||||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
|
||||||
test_error(error, "State is not Pending");
|
|
||||||
|
|
||||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||||
test_failure_error_ret(error, CL_INVALID_OPERATION,
|
test_failure_error_ret(error, CL_INVALID_OPERATION,
|
||||||
|
|||||||
@@ -0,0 +1,12 @@
|
|||||||
|
set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB)
|
||||||
|
|
||||||
|
set(${MODULE_NAME}_SOURCES
|
||||||
|
main.cpp
|
||||||
|
test_ahb.cpp
|
||||||
|
test_ahb_negative.cpp
|
||||||
|
debug_ahb.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
link_libraries(OpenCL nativewindow)
|
||||||
|
|
||||||
|
include(../../CMakeCommon.txt)
|
||||||
@@ -0,0 +1,193 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "debug_ahb.h"
|
||||||
|
|
||||||
|
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN,
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER,
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY,
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK,
|
||||||
|
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE,
|
||||||
|
AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER,
|
||||||
|
AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY,
|
||||||
|
AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT,
|
||||||
|
AHARDWAREBUFFER_USAGE_VIDEO_ENCODE,
|
||||||
|
AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA,
|
||||||
|
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER,
|
||||||
|
AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP,
|
||||||
|
AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE,
|
||||||
|
AHARDWAREBUFFER_USAGE_FRONT_BUFFER,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags)
|
||||||
|
{
|
||||||
|
if (flags == 0)
|
||||||
|
{
|
||||||
|
return "UNKNOWN FLAG";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> active_flags;
|
||||||
|
for (const auto flag : flag_list)
|
||||||
|
{
|
||||||
|
if (flag & flags)
|
||||||
|
{
|
||||||
|
active_flags.push_back(ahardwareBufferUsageFlagToString(flag));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (active_flags.empty())
|
||||||
|
{
|
||||||
|
return "UNKNOWN FLAG";
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::accumulate(active_flags.begin() + 1, active_flags.end(),
|
||||||
|
active_flags.front(),
|
||||||
|
[](std::string acc, const std::string& flag) {
|
||||||
|
return std::move(acc) + "|" + flag;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag)
|
||||||
|
{
|
||||||
|
std::string result;
|
||||||
|
switch (flag)
|
||||||
|
{
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_READ_MASK:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE:
|
||||||
|
result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE";
|
||||||
|
break;
|
||||||
|
default: result = "Unknown flag";
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
|
||||||
|
{
|
||||||
|
std::string result;
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_BLOB:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_BLOB";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_D16_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_D16_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_D24_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_D32_FLOAT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_S8_UINT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_S8_UINT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_YCbCr_P010:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_YCbCr_P210:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R8_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R8_UNORM";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R16_UINT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R16_UINT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R16G16_UINT:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT";
|
||||||
|
break;
|
||||||
|
case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM:
|
||||||
|
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <android/hardware_buffer.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
|
||||||
|
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
|
||||||
|
{ \
|
||||||
|
const std::string usage_string = \
|
||||||
|
ahardwareBufferDecodeUsageFlagsToString( \
|
||||||
|
static_cast<AHardwareBuffer_UsageFlags>( \
|
||||||
|
ahardwareBuffer_Desc.usage)); \
|
||||||
|
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \
|
||||||
|
"%u, layers = %u)\n", \
|
||||||
|
ahardwareBufferFormatToString(format.aHardwareBufferFormat) \
|
||||||
|
.c_str(), \
|
||||||
|
usage_string.c_str(), ahardwareBuffer_Desc.width, \
|
||||||
|
ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \
|
||||||
|
continue; \
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
|
||||||
|
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
|
||||||
|
std::string
|
||||||
|
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "harness/testHarness.h"
|
||||||
|
|
||||||
|
int main(int argc, const char *argv[])
|
||||||
|
{
|
||||||
|
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
|
||||||
|
test_registry::getInstance().definitions(), false, 0);
|
||||||
|
}
|
||||||
1911
test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
Normal file
1911
test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,246 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2025 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "harness/compat.h"
|
||||||
|
#include "harness/kernelHelpers.h"
|
||||||
|
#include "harness/imageHelpers.h"
|
||||||
|
#include "harness/errorHelpers.h"
|
||||||
|
#include <android/hardware_buffer.h>
|
||||||
|
#include "debug_ahb.h"
|
||||||
|
|
||||||
|
REGISTER_TEST(test_buffer_format_negative)
|
||||||
|
{
|
||||||
|
cl_int err = CL_SUCCESS;
|
||||||
|
|
||||||
|
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
if (!is_extension_available(
|
||||||
|
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||||
|
"supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||||
|
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||||
|
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||||
|
aHardwareBufferDesc.width = 64;
|
||||||
|
aHardwareBufferDesc.height = 1;
|
||||||
|
aHardwareBufferDesc.layers = 1;
|
||||||
|
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||||
|
|
||||||
|
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||||
|
{
|
||||||
|
const std::string usage_string =
|
||||||
|
ahardwareBufferDecodeUsageFlagsToString(
|
||||||
|
static_cast<AHardwareBuffer_UsageFlags>(
|
||||||
|
aHardwareBufferDesc.usage));
|
||||||
|
log_info(
|
||||||
|
"Unsupported format %s, usage flags %s\n",
|
||||||
|
ahardwareBufferFormatToString(
|
||||||
|
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
|
||||||
|
.c_str(),
|
||||||
|
usage_string.c_str());
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||||
|
const int ahb_result =
|
||||||
|
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||||
|
if (ahb_result != 0)
|
||||||
|
{
|
||||||
|
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
log_info("Testing %s\n",
|
||||||
|
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
|
||||||
|
aHardwareBufferDesc.format))
|
||||||
|
.c_str());
|
||||||
|
|
||||||
|
cl_mem_properties props[] = {
|
||||||
|
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||||
|
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_mem buffer = clCreateBufferWithProperties(
|
||||||
|
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
|
||||||
|
test_assert_error(err == CL_INVALID_OPERATION,
|
||||||
|
"To create a buffer the aHardwareFormat must be "
|
||||||
|
"AHARDWAREBUFFER_FORMAT_BLOB");
|
||||||
|
|
||||||
|
if (buffer != nullptr)
|
||||||
|
{
|
||||||
|
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer_release(aHardwareBuffer);
|
||||||
|
aHardwareBuffer = nullptr;
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(test_buffer_size_negative)
|
||||||
|
{
|
||||||
|
cl_int err = CL_SUCCESS;
|
||||||
|
constexpr size_t buffer_size = 64;
|
||||||
|
|
||||||
|
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
if (!is_extension_available(
|
||||||
|
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||||
|
"supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||||
|
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
|
||||||
|
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||||
|
aHardwareBufferDesc.width = buffer_size;
|
||||||
|
aHardwareBufferDesc.height = 1;
|
||||||
|
aHardwareBufferDesc.layers = 1;
|
||||||
|
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||||
|
|
||||||
|
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||||
|
{
|
||||||
|
const std::string usage_string =
|
||||||
|
ahardwareBufferDecodeUsageFlagsToString(
|
||||||
|
static_cast<AHardwareBuffer_UsageFlags>(
|
||||||
|
aHardwareBufferDesc.usage));
|
||||||
|
log_info(
|
||||||
|
"Unsupported format %s, usage flags %s\n",
|
||||||
|
ahardwareBufferFormatToString(
|
||||||
|
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
|
||||||
|
.c_str(),
|
||||||
|
usage_string.c_str());
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||||
|
const int ahb_result =
|
||||||
|
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||||
|
if (ahb_result != 0)
|
||||||
|
{
|
||||||
|
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
log_info("Testing %s\n",
|
||||||
|
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
|
||||||
|
aHardwareBufferDesc.format))
|
||||||
|
.c_str());
|
||||||
|
|
||||||
|
cl_mem_properties props[] = {
|
||||||
|
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||||
|
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_mem buffer = clCreateBufferWithProperties(
|
||||||
|
context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err);
|
||||||
|
test_assert_error(err == CL_INVALID_BUFFER_SIZE,
|
||||||
|
"Wrong error value returned");
|
||||||
|
|
||||||
|
if (buffer != nullptr)
|
||||||
|
{
|
||||||
|
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer_release(aHardwareBuffer);
|
||||||
|
aHardwareBuffer = nullptr;
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_TEST(test_images_negative)
|
||||||
|
{
|
||||||
|
cl_int err = CL_SUCCESS;
|
||||||
|
|
||||||
|
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
if (!is_extension_available(
|
||||||
|
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||||
|
{
|
||||||
|
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||||
|
"supported on this platform. "
|
||||||
|
"Skipping test.\n");
|
||||||
|
return TEST_SKIPPED_ITSELF;
|
||||||
|
}
|
||||||
|
|
||||||
|
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||||
|
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||||
|
aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>(
|
||||||
|
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
|
||||||
|
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
|
||||||
|
| AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
|
||||||
|
| AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER);
|
||||||
|
aHardwareBufferDesc.width = 64;
|
||||||
|
aHardwareBufferDesc.height = 64;
|
||||||
|
aHardwareBufferDesc.layers = 1;
|
||||||
|
|
||||||
|
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||||
|
int ahb_result =
|
||||||
|
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||||
|
if (ahb_result != 0)
|
||||||
|
{
|
||||||
|
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const cl_mem_properties props[] = {
|
||||||
|
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||||
|
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
|
||||||
|
cl_mem image =
|
||||||
|
clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
|
||||||
|
&image_format, nullptr, nullptr, &err);
|
||||||
|
test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
|
||||||
|
"Wrong error value returned");
|
||||||
|
if (image != nullptr)
|
||||||
|
{
|
||||||
|
test_error(clReleaseMemObject(image), "Failed to release image");
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 };
|
||||||
|
image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
|
||||||
|
nullptr, &image_desc, nullptr, &err);
|
||||||
|
test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR,
|
||||||
|
"Wrong error value returned");
|
||||||
|
if (image != nullptr)
|
||||||
|
{
|
||||||
|
test_error(clReleaseMemObject(image), "Failed to release image");
|
||||||
|
}
|
||||||
|
AHardwareBuffer_release(aHardwareBuffer);
|
||||||
|
aHardwareBuffer = nullptr;
|
||||||
|
|
||||||
|
return TEST_PASS;
|
||||||
|
}
|
||||||
@@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
|
|||||||
return TEST_PASS;
|
return TEST_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Confirm that signal a semaphore with no event dependencies will not result
|
|
||||||
// in an implicit dependency on everything previously submitted
|
|
||||||
REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2))
|
|
||||||
{
|
|
||||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
|
||||||
|
|
||||||
if (init_vulkan_device(1, &device))
|
|
||||||
{
|
|
||||||
log_info("Cannot initialise Vulkan. "
|
|
||||||
"Skipping test.\n");
|
|
||||||
return TEST_SKIPPED_ITSELF;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanDevice vkDevice;
|
|
||||||
|
|
||||||
// Obtain pointers to semaphore's API
|
|
||||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
|
||||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
|
||||||
|
|
||||||
std::vector<VulkanExternalSemaphoreHandleType>
|
|
||||||
vkExternalSemaphoreHandleTypeList =
|
|
||||||
getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice);
|
|
||||||
|
|
||||||
if (vkExternalSemaphoreHandleTypeList.empty())
|
|
||||||
{
|
|
||||||
test_fail("No external semaphore handle types found\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
|
|
||||||
vkExternalSemaphoreHandleTypeList)
|
|
||||||
{
|
|
||||||
log_info_semaphore_type(vkExternalSemaphoreHandleType);
|
|
||||||
VulkanSemaphore vkVk2CLSemaphore(vkDevice,
|
|
||||||
vkExternalSemaphoreHandleType);
|
|
||||||
|
|
||||||
auto sema_ext = clExternalImportableSemaphore(
|
|
||||||
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device);
|
|
||||||
|
|
||||||
cl_int err = CL_SUCCESS;
|
|
||||||
|
|
||||||
// Create ooo queue
|
|
||||||
clCommandQueueWrapper queue = clCreateCommandQueue(
|
|
||||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
|
|
||||||
test_error(err, "Could not create command queue");
|
|
||||||
|
|
||||||
// Create user event
|
|
||||||
clEventWrapper user_event = clCreateUserEvent(context, &err);
|
|
||||||
test_error(err, "Could not create user event");
|
|
||||||
|
|
||||||
// Create Kernel
|
|
||||||
clProgramWrapper program;
|
|
||||||
clKernelWrapper kernel;
|
|
||||||
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
|
||||||
&source, "empty");
|
|
||||||
test_error(err, "Could not create kernel");
|
|
||||||
|
|
||||||
// Enqueue task_1 (dependency on user_event)
|
|
||||||
clEventWrapper task_1_event;
|
|
||||||
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
|
|
||||||
test_error(err, "Could not enqueue task 1");
|
|
||||||
|
|
||||||
// Signal semaphore
|
|
||||||
clEventWrapper signal_event;
|
|
||||||
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
|
|
||||||
nullptr, 0, nullptr, &signal_event);
|
|
||||||
test_error(err, "Could not signal semaphore");
|
|
||||||
|
|
||||||
// Wait semaphore
|
|
||||||
clEventWrapper wait_event;
|
|
||||||
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
|
|
||||||
nullptr, 0, nullptr, &wait_event);
|
|
||||||
test_error(err, "Could not wait semaphore");
|
|
||||||
|
|
||||||
// Flush and delay
|
|
||||||
err = clFlush(queue);
|
|
||||||
test_error(err, "Could not flush queue");
|
|
||||||
|
|
||||||
cl_event event_list[] = { signal_event, wait_event };
|
|
||||||
err = clWaitForEvents(2, event_list);
|
|
||||||
test_error(err, "Could not wait on events");
|
|
||||||
|
|
||||||
// Ensure all events are completed except for task_1
|
|
||||||
test_assert_event_inprogress(task_1_event);
|
|
||||||
test_assert_event_complete(signal_event);
|
|
||||||
test_assert_event_complete(wait_event);
|
|
||||||
|
|
||||||
// Complete user_event
|
|
||||||
err = clSetUserEventStatus(user_event, CL_COMPLETE);
|
|
||||||
test_error(err, "Could not set user event to CL_COMPLETE");
|
|
||||||
|
|
||||||
// Finish
|
|
||||||
err = clFinish(queue);
|
|
||||||
test_error(err, "Could not finish queue");
|
|
||||||
|
|
||||||
// Ensure all events are completed
|
|
||||||
test_assert_event_complete(task_1_event);
|
|
||||||
test_assert_event_complete(signal_event);
|
|
||||||
test_assert_event_complete(wait_event);
|
|
||||||
}
|
|
||||||
|
|
||||||
return TEST_PASS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Confirm that a semaphore can be reused multiple times
|
// Confirm that a semaphore can be reused multiple times
|
||||||
REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
|
REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SimpleSemaphore2 : public SemaphoreTestBase
|
|
||||||
{
|
|
||||||
SimpleSemaphore2(cl_device_id device, cl_context context,
|
|
||||||
cl_command_queue queue, cl_int nelems)
|
|
||||||
: SemaphoreTestBase(device, context, queue, nelems)
|
|
||||||
{}
|
|
||||||
|
|
||||||
cl_int Run() override
|
|
||||||
{
|
|
||||||
cl_int err = CL_SUCCESS;
|
|
||||||
// Create ooo queue
|
|
||||||
clCommandQueueWrapper queue = clCreateCommandQueue(
|
|
||||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
|
|
||||||
test_error(err, "Could not create command queue");
|
|
||||||
|
|
||||||
// Create semaphore
|
|
||||||
cl_semaphore_properties_khr sema_props[] = {
|
|
||||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
|
|
||||||
static_cast<cl_semaphore_properties_khr>(
|
|
||||||
CL_SEMAPHORE_TYPE_BINARY_KHR),
|
|
||||||
0
|
|
||||||
};
|
|
||||||
semaphore =
|
|
||||||
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
|
|
||||||
test_error(err, "Could not create semaphore");
|
|
||||||
|
|
||||||
// Create user event
|
|
||||||
clEventWrapper user_event = clCreateUserEvent(context, &err);
|
|
||||||
test_error(err, "Could not create user event");
|
|
||||||
|
|
||||||
// Create Kernel
|
|
||||||
clProgramWrapper program;
|
|
||||||
clKernelWrapper kernel;
|
|
||||||
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
|
||||||
&source, "empty");
|
|
||||||
test_error(err, "Could not create kernel");
|
|
||||||
|
|
||||||
// Enqueue task_1 (dependency on user_event)
|
|
||||||
clEventWrapper task_1_event;
|
|
||||||
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
|
|
||||||
test_error(err, "Could not enqueue task 1");
|
|
||||||
|
|
||||||
// Signal semaphore
|
|
||||||
clEventWrapper signal_event;
|
|
||||||
err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
|
|
||||||
nullptr, &signal_event);
|
|
||||||
test_error(err, "Could not signal semaphore");
|
|
||||||
|
|
||||||
// Wait semaphore
|
|
||||||
clEventWrapper wait_event;
|
|
||||||
err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
|
|
||||||
nullptr, &wait_event);
|
|
||||||
test_error(err, "Could not wait semaphore");
|
|
||||||
|
|
||||||
// Flush and delay
|
|
||||||
err = clFlush(queue);
|
|
||||||
test_error(err, "Could not flush queue");
|
|
||||||
std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
|
|
||||||
|
|
||||||
// Ensure all events are completed except for task_1
|
|
||||||
test_assert_event_inprogress(task_1_event);
|
|
||||||
test_assert_event_complete(signal_event);
|
|
||||||
test_assert_event_complete(wait_event);
|
|
||||||
|
|
||||||
// Complete user_event
|
|
||||||
err = clSetUserEventStatus(user_event, CL_COMPLETE);
|
|
||||||
test_error(err, "Could not set user event to CL_COMPLETE");
|
|
||||||
|
|
||||||
// Finish
|
|
||||||
err = clFinish(queue);
|
|
||||||
test_error(err, "Could not finish queue");
|
|
||||||
|
|
||||||
// Ensure all events are completed
|
|
||||||
test_assert_event_complete(task_1_event);
|
|
||||||
test_assert_event_complete(signal_event);
|
|
||||||
test_assert_event_complete(wait_event);
|
|
||||||
|
|
||||||
return CL_SUCCESS;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SemaphoreReuse : public SemaphoreTestBase
|
struct SemaphoreReuse : public SemaphoreTestBase
|
||||||
{
|
{
|
||||||
SemaphoreReuse(cl_device_id device, cl_context context,
|
SemaphoreReuse(cl_device_id device, cl_context context,
|
||||||
@@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2))
|
|||||||
num_elements);
|
num_elements);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Confirm that signal a semaphore with no event dependencies will not result
|
|
||||||
// in an implicit dependency on everything previously submitted
|
|
||||||
REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2))
|
|
||||||
{
|
|
||||||
return MakeAndRunTest<SimpleSemaphore2>(device, context, queue,
|
|
||||||
num_elements);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Confirm that a semaphore can be reused multiple times
|
// Confirm that a semaphore can be reused multiple times
|
||||||
REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2))
|
REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "harness/compat.h"
|
#include "harness/compat.h"
|
||||||
#include "harness/kernelHelpers.h"
|
#include "harness/kernelHelpers.h"
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
|
#include "harness/parseParameters.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|||||||
@@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0;
|
|||||||
uint32_t gComputeDevices = 0;
|
uint32_t gComputeDevices = 0;
|
||||||
size_t gMaxThreadGroupSize = 0;
|
size_t gMaxThreadGroupSize = 0;
|
||||||
size_t gWorkGroupSize = 0;
|
size_t gWorkGroupSize = 0;
|
||||||
bool gWimpyMode = false;
|
|
||||||
int gWimpyReductionFactor = 512;
|
int gWimpyReductionFactor = 512;
|
||||||
int gTestDouble = 0;
|
int gTestDouble = 0;
|
||||||
bool gHostReset = false;
|
bool gHostReset = false;
|
||||||
|
|||||||
@@ -74,7 +74,6 @@ extern bool gHostReset;
|
|||||||
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
|
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
|
||||||
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
||||||
// as a smoke test
|
// as a smoke test
|
||||||
extern bool gWimpyMode;
|
|
||||||
extern int gWimpyReductionFactor;
|
extern int gWimpyReductionFactor;
|
||||||
|
|
||||||
uint64_t ReadTime( void );
|
uint64_t ReadTime( void );
|
||||||
|
|||||||
@@ -83,13 +83,6 @@ int main (int argc, const char **argv )
|
|||||||
if( (error = ParseArgs( argc, argv )) )
|
if( (error = ParseArgs( argc, argv )) )
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
if (gIsEmbedded) {
|
|
||||||
vlog( "\tProfile: Embedded\n" );
|
|
||||||
}else
|
|
||||||
{
|
|
||||||
vlog( "\tProfile: Full\n" );
|
|
||||||
}
|
|
||||||
|
|
||||||
fflush( stdout );
|
fflush( stdout );
|
||||||
error = runTestHarnessWithCheck(
|
error = runTestHarnessWithCheck(
|
||||||
argCount, argList, test_registry::getInstance().num_tests(),
|
argCount, argList, test_registry::getInstance().num_tests(),
|
||||||
@@ -114,6 +107,10 @@ exit:
|
|||||||
|
|
||||||
static int ParseArgs( int argc, const char **argv )
|
static int ParseArgs( int argc, const char **argv )
|
||||||
{
|
{
|
||||||
|
if (gListTests)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
int i;
|
int i;
|
||||||
argList = (const char **)calloc(argc, sizeof(char *));
|
argList = (const char **)calloc(argc, sizeof(char *));
|
||||||
if( NULL == argList )
|
if( NULL == argList )
|
||||||
@@ -181,9 +178,6 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
|
|
||||||
case 'r': gHostReset = true; break;
|
case 'r': gHostReset = true; break;
|
||||||
|
|
||||||
case 'w': // Wimpy mode
|
|
||||||
gWimpyMode = true;
|
|
||||||
break;
|
|
||||||
case '[':
|
case '[':
|
||||||
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
|
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
|
||||||
break;
|
break;
|
||||||
@@ -202,12 +196,6 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getenv("CL_WIMPY_MODE")) {
|
|
||||||
vlog( "\n" );
|
|
||||||
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
|
|
||||||
gWimpyMode = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
PrintArch();
|
PrintArch();
|
||||||
if( gWimpyMode )
|
if( gWimpyMode )
|
||||||
{
|
{
|
||||||
@@ -217,6 +205,16 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
||||||
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
|
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (gIsEmbedded)
|
||||||
|
{
|
||||||
|
vlog("\tProfile: Embedded\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog("\tProfile: Full\n");
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,7 +225,6 @@ static void PrintUsage( void )
|
|||||||
"supported)\n");
|
"supported)\n");
|
||||||
vlog("\t\t-t\tToggle reporting performance data.\n");
|
vlog("\t\t-t\tToggle reporting performance data.\n");
|
||||||
vlog("\t\t-r\tReset buffers on host instead of on device.\n");
|
vlog("\t\t-r\tReset buffers on host instead of on device.\n");
|
||||||
vlog("\t\t-w\tRun in wimpy mode\n");
|
|
||||||
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||||
"1-12, default factor(%u)\n",
|
"1-12, default factor(%u)\n",
|
||||||
gWimpyReductionFactor);
|
gWimpyReductionFactor);
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue,
|
|||||||
cl_mem_flags buffer_flags = CL_MEM_READ_WRITE;
|
cl_mem_flags buffer_flags = CL_MEM_READ_WRITE;
|
||||||
if (enable_pitch)
|
if (enable_pitch)
|
||||||
{
|
{
|
||||||
if (version.major() == 1)
|
if (version.get_major() == 1)
|
||||||
{
|
{
|
||||||
host_ptr = malloc(imageInfo->rowPitch);
|
host_ptr = malloc(imageInfo->rowPitch);
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user