Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2025-09-09 17:39:11 -07:00
152 changed files with 6767 additions and 1914 deletions

View File

@@ -10,7 +10,7 @@ jobs:
matrix:
build-type: [Release]
gl: [0]
os: [ubuntu-22.04, macos-latest, windows-latest]
os: [ubuntu-22.04, macos-latest, windows-latest, windows-11-arm]
include:
- os: ubuntu-22.04
gl: 1
@@ -28,7 +28,7 @@ jobs:
arch: android-aarch64
android_arch_abi: arm64-v8a
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- name: Setup Ninja
uses: seanmiddleditch/gha-setup-ninja@master
- name: Install Arm and AArch64 compilers
@@ -62,6 +62,10 @@ jobs:
git checkout cl_khr_unified_svm
ln -s CL OpenCL # For OSX builds
cd ..
- name: Fetch SPIR-V Headers
shell: bash
run: |
git clone https://github.com/KhronosGroup/SPIRV-Headers.git
- name: Install Vulkan SDK
uses: humbletim/install-vulkan-sdk@main
with:
@@ -70,12 +74,13 @@ jobs:
- name: Install Android NDK
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
run: |
wget https://dl.google.com/android/repository/android-ndk-r27c-linux.zip -O android-ndk.zip
wget https://dl.google.com/android/repository/android-ndk-r28c-linux.zip -O android-ndk.zip
unzip android-ndk.zip -d $HOME
export ANDROID_NDK=$HOME/android-ndk-r27c
export ANDROID_NDK=$HOME/android-ndk-r28c
echo "ANDROID_NDK=$ANDROID_NDK" >> $GITHUB_ENV
export ANDROID_ARCH_ABI=${{ matrix.android_arch_abi }}
echo "ANDROID_ARCH_ABI=$ANDROID_ARCH_ABI" >> $GITHUB_ENV
echo "ANDROID_PLATFORM=29" >> $GITHUB_ENV
- name: Prepare CMake Toolchain file
shell: bash
run: |
@@ -108,7 +113,7 @@ jobs:
if: ${{ matrix.arch == 'android-arm' || matrix.arch == 'android-aarch64' }}
shell: bash
run: |
echo "CMAKE_CONFIG_ARGS_ANDROID=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI}" >> $GITHUB_ENV
echo "CMAKE_ADDITIONAL_CONFIG_ARGS=-DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ARCH_ABI} -DANDROID_PLATFORM=${ANDROID_PLATFORM}" >> $GITHUB_ENV
- name: Fetch and build OpenCL ICD Loader
shell: bash
run: |
@@ -120,7 +125,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
-DOPENCL_ICD_LOADER_HEADERS_DIR='${{ github.workspace }}'/OpenCL-Headers/ \
"${CMAKE_CONFIG_ARGS_ANDROID}"
${CMAKE_ADDITIONAL_CONFIG_ARGS}
cmake --build . --parallel
- name: Fetch Vulkan Headers
shell: bash
@@ -150,6 +155,7 @@ jobs:
cd build
if [[ ${RUNNER_OS} == "Windows" ]]; then
CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
CMAKE_ADDITIONAL_CONFIG_ARGS="-DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON"
else
CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL"
if [[ '${{ matrix.arch }}' != android-* ]]; then
@@ -158,8 +164,10 @@ jobs:
fi
cmake .. -G Ninja \
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \
-DCMAKE_C_COMPILER_LAUNCHER=sccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
-DCL_INCLUDE_DIR='${{ github.workspace }}'/OpenCL-Headers \
-DSPIRV_INCLUDE_DIR='${{ github.workspace }}'/SPIRV-Headers \
-DCL_LIB_DIR='${{ github.workspace }}'/OpenCL-ICD-Loader/build \
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
@@ -169,7 +177,7 @@ jobs:
-DVULKAN_IS_SUPPORTED=ON \
-DVULKAN_INCLUDE_DIR='${{ github.workspace }}'/Vulkan-Headers/include/ \
-DVULKAN_LIB_DIR='${{ github.workspace }}'/Vulkan-Loader/build/loader/ \
"${CMAKE_CONFIG_ARGS_ANDROID}"
${CMAKE_ADDITIONAL_CONFIG_ARGS}
cmake --build . --parallel
formatcheck:
name: Check code format
@@ -177,7 +185,7 @@ jobs:
steps:
- name: Install packages
run: sudo apt install -y clang-format clang-format-14
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Check code format

View File

@@ -59,6 +59,12 @@ else(CL_INCLUDE_DIR AND CL_LIB_DIR)
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
# SPIRV_INCLUDE_DIR - path to dir with SPIR-V headers
if(NOT SPIRV_INCLUDE_DIR)
message(STATUS "SPIR-V headers haven't been found!")
message(FATAL_ERROR "Pass -DSPIRV_INCLUDE_DIR")
endif(NOT SPIRV_INCLUDE_DIR)
# CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR)
link_directories(${CLConform_GL_LIBRARIES_DIR})
@@ -195,6 +201,7 @@ if(APPLE)
endif(APPLE)
include_directories(SYSTEM ${CL_INCLUDE_DIR})
include_directories(SYSTEM ${SPIRV_INCLUDE_DIR}/include)
include_directories(${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_SOURCE_DIR}/test_common/gles
${CLConform_SOURCE_DIR}/test_common/gl

View File

@@ -13,6 +13,8 @@ Compiling the CTS requires the following CMake configuration options to be set:
* `CL_INCLUDE_DIR` Points to the unified
[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers).
* `SPIRV_INCLUDE_DIR` Points to the unified
[SPIRV-Headers](https://github.com/KhronosGroup/SPIRV-Headers).
* `CL_LIB_DIR` Directory containing the OpenCL library to build against.
* `SPIRV_TOOLS_DIR` Directory containing the `spirv-as` and `spirv-val` binaries
to be used in the CTS build process. Alternatively, the location to these binaries
@@ -31,6 +33,7 @@ a build, and compile.
```sh
git clone https://github.com/KhronosGroup/OpenCL-CTS.git
git clone https://github.com/KhronosGroup/OpenCL-Headers.git
git clone https://github.com/KhronosGroup/SPIRV-Headers.git
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
git clone https://github.com/KhronosGroup/SPIRV-Tools.git
git clone https://github.com/KhronosGroup/SPIRV-Headers.git SPIRV-Tools/external/spirv-headers
@@ -50,6 +53,7 @@ cmake --build SPIRV-Tools/build --config Release
mkdir OpenCL-CTS/build
cmake -S OpenCL-CTS -B OpenCL-CTS/build \
-DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \
-DSPIRV_INCLUDE_DIR=$PWD/SPIRV-Headers \
-DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \
-DSPIRV_TOOLS_DIR=$PWD/SPIRV-Tools/build/tools/ \
-DOPENCL_LIBRARIES=OpenCL

View File

@@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p)
// drop run count to 0
gRunCount = 0;
#if defined(_M_IX86) || defined(_M_X64)
_mm_mfence();
#elif defined(_M_ARM64)
__dmb(_ARM64_BARRIER_ISHST);
#else
#error Architecture needs an implementation
#endif
#else
if (pthread_mutex_lock(&gAtomicLock))
log_error(
@@ -703,7 +710,13 @@ void ThreadPool_Exit(void)
// http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
__sync_synchronize();
#elif defined(_MSC_VER)
#if defined(_M_IX86) || defined(_M_X64)
_mm_mfence();
#elif defined(_M_ARM64)
__dmb(_ARM64_BARRIER_ISHST);
#else
#error Architecture needs an implementation
#endif
#else
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
#endif

View File

@@ -23,10 +23,10 @@
#include <CL/cl_half.h>
#if defined(__SSE__) || defined(_MSC_VER)
#if defined(__SSE__) || _M_IX86_FP == 1
#include <xmmintrin.h>
#endif
#if defined(__SSE2__) || defined(_MSC_VER)
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
#include <emmintrin.h>
#endif
@@ -110,7 +110,7 @@ static long lrintf_clamped(float f)
volatile float x = f;
float magicVal = magic[f < 0];
#if defined(__SSE__) || defined(_WIN32)
#if defined(__SSE__) || _M_IX86_FP == 1
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128 v = _mm_set_ss(x);
@@ -150,7 +150,7 @@ static long lrint_clamped(double f)
{
volatile double x = f;
double magicVal = magic[f < 0];
#if defined(__SSE2__) || (defined(_MSC_VER))
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128d v = _mm_set_sd(x);

View File

@@ -387,8 +387,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
}
// reference is a normal power of two or a zero
int ulp_exp =
HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1);
int ulp_exp = HALF_MANT_DIG - std::max(ilogb(reference), HALF_MIN_EXP);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -469,8 +468,7 @@ float Ulp_Error(float test, double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp =
FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1);
int ulp_exp = FLT_MANT_DIG - std::max(ilogb(reference), FLT_MIN_EXP);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -553,8 +551,7 @@ float Ulp_Error_Double(double test, long double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp =
DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP);
// Scale the exponent of the error
float result = (float)scalbnl(testVal - reference, ulp_exp);

View File

@@ -37,36 +37,44 @@ typedef int FPU_mode_type;
#else
typedef int64_t FPU_mode_type;
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
#include <xmmintrin.h>
#elif defined(_M_ARM64)
#include <intrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
// Set the reference hardware floating point unit to FTZ mode
inline void ForceFTZ(FPU_mode_type *mode)
inline void ForceFTZ(FPU_mode_type *oldMode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode | 0x8040);
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode | 0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
*oldMode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
*oldMode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
#elif defined(__aarch64__) // Clang
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
*oldMode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
#elif defined(_M_ARM64) // Visual Studio
uint64_t fpscr;
fpscr = _ReadStatusReg(ARM64_FPSR);
*oldMode = fpscr;
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
#else
@@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode)
}
// Disable the denorm flush to zero
inline void DisableFTZ(FPU_mode_type *mode)
inline void DisableFTZ(FPU_mode_type *oldMode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode & ~0x8040);
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
*oldMode = _mm_getcsr();
_mm_setcsr(*oldMode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
*oldMode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
#elif defined(__aarch64__) // Clang
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
*oldMode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
#elif defined(_M_ARM64) // Visual Studio
uint64_t fpscr;
fpscr = _ReadStatusReg(ARM64_FPSR);
*oldMode = fpscr;
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#else
@@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode)
// Restore the reference hardware to floating point state indicated by *mode
inline void RestoreFPState(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64) || defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
// Add 64 bit support
#elif defined(__aarch64__)
#elif defined(__aarch64__) // Clang
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
#elif defined(_M_ARM64) // Visual Studio
_WriteStatusReg(ARM64_FPCR, *mode);
#elif defined(__mips__)
// Mips runs by default with DAZ=1 FTZ=1
#else
@@ -125,4 +140,4 @@ inline void RestoreFPState(FPU_mode_type *mode)
#error ForceFTZ and RestoreFPState need implentations
#endif
#endif
#endif

View File

@@ -0,0 +1,35 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _mathHelpers_h
#define _mathHelpers_h
#if defined(__APPLE__)
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#include <cmath>
template <typename T> inline bool isnan_fp(const T &v) { return std::isnan(v); }
template <> inline bool isnan_fp<cl_half>(const cl_half &v)
{
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
return (h_exp == 0x1F && h_mant != 0);
}
#endif // _mathHelpers_h

View File

@@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern)
#endif // !__has_builtin(__builtin_clz)
#include <intrin.h>
#if !defined(_M_ARM64)
#include <emmintrin.h>
#endif
int usleep(int usec)
{

View File

@@ -35,6 +35,8 @@ std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
bool gDisableSPIRVValidation = false;
std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
unsigned gNumWorkerThreads;
bool gListTests = false;
bool gWimpyMode = false;
void helpInfo()
{
@@ -49,6 +51,12 @@ void helpInfo()
spir-v Use SPIR-V offline compilation
--num-worker-threads <num>
Select parallel execution with the specified number of worker threads.
--list
List sub-tests
-w, --wimpy
Enable wimpy mode. It does not impact all tests. Impacted tests will run
with a very small subset of the tests. This option should not be used
for conformance submission (default: disabled).
For offline compilation (binary and spir-v modes) only:
--compilation-cache-mode <cache-mode>
@@ -104,6 +112,16 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
// option and print its own help.
helpInfo();
}
else if (!strcmp(argv[i], "--list") || !strcmp(argv[i], "-list"))
{
delArg++;
gListTests = true;
}
else if (!strcmp(argv[i], "--wimpy") || !strcmp(argv[i], "-w"))
{
delArg++;
gWimpyMode = true;
}
else if (!strcmp(argv[i], "--compilation-mode"))
{
delArg++;

View File

@@ -40,6 +40,8 @@ extern std::string gCompilationCachePath;
extern std::string gCompilationProgram;
extern bool gDisableSPIRVValidation;
extern std::string gSPIRVValidator;
extern bool gListTests;
extern bool gWimpyMode;
extern int parseCustomParam(int argc, const char *argv[],
const char *ignore = 0);

View File

@@ -193,7 +193,8 @@ RoundingMode get_round(void)
// basic_test_conversions.c in which case, these function are at
// liberty to do nothing.
//
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
@@ -203,18 +204,24 @@ RoundingMode get_round(void)
void *FlushToZero(void)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64)
union {
unsigned int i;
void *p;
} u = { _mm_getcsr() };
_mm_setcsr(u.i | 0x8040);
return u.p;
#elif defined(__arm__) || defined(__aarch64__)
#elif defined(__arm__) || defined(__aarch64__) // Clang
int64_t fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr | FPSCR_FZ);
return NULL;
#elif defined(_M_ARM64) // Visual Studio
uint64_t fpscr;
fpscr = _ReadStatusReg(ARM64_FPSR);
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
return NULL;
#elif defined(__PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);
@@ -237,16 +244,21 @@ void *FlushToZero(void)
void UnFlushToZero(void *p)
{
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|| defined(_M_X64)
union {
void *p;
unsigned int i;
} u = { p };
_mm_setcsr(u.i);
#elif defined(__arm__) || defined(__aarch64__)
#elif defined(__arm__) || defined(__aarch64__) // Clang
int64_t fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr & ~FPSCR_FZ);
#elif defined(_M_ARM64) // Visual Studio
uint64_t fpscr;
fpscr = _ReadStatusReg(ARM64_FPSR);
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
#elif defined(__PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);

View File

@@ -22,6 +22,7 @@
#include <cassert>
#include <deque>
#include <mutex>
#include <set>
#include <stdexcept>
#include <thread>
#include <vector>
@@ -169,6 +170,19 @@ void version_expected_info(const char *test_name, const char *api_name,
"reports %s version %s)\n",
test_name, api_name, expected_version, api_name, device_version);
}
static void list_tests(int testNum, test_definition testList[])
{
std::set<std::string> names;
for (int i = 0; i < testNum; i++)
{
names.insert(testList[i].name);
}
for (const auto &name : names)
{
log_info("\t%s\n", name.c_str());
}
}
int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
test_definition testList[],
int forceNoContextCreation,
@@ -197,8 +211,11 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
if (env_mode != NULL)
{
based_on_env_var = 1;
if (strcmp(env_mode, "gpu") == 0
|| strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
if (strcmp(env_mode, "all") == 0
|| strcmp(env_mode, "CL_DEVICE_TYPE_ALL") == 0)
device_type = CL_DEVICE_TYPE_ALL;
else if (strcmp(env_mode, "gpu") == 0
|| strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
device_type = CL_DEVICE_TYPE_GPU;
else if (strcmp(env_mode, "cpu") == 0
|| strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
@@ -255,10 +272,23 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
return EXIT_FAILURE;
}
/* Special case: just list the tests */
if ((argc > 1)
&& (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h")
|| !strcmp(argv[1], "--help")))
if (gListTests)
{
list_tests(testNum, testList);
return EXIT_SUCCESS;
}
gWimpyMode |= (getenv("CL_WIMPY_MODE") != nullptr);
if (gWimpyMode)
{
log_info("\n");
log_info("**************************\n");
log_info("*** Wimpy mode enabled ***\n");
log_info("**************************\n");
log_info("\n");
}
if ((argc > 1) && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")))
{
char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
@@ -271,7 +301,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
"(default 0).\n");
log_info("\tid<num>\t\tIndicates device at index <num> should be used "
"(default 0).\n");
log_info("\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
log_info("\t<device_type>\tall|cpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
"(default CL_DEVICE_TYPE_DEFAULT)\n");
log_info("\n");
log_info("\tNOTE: You may pass environment variable "
@@ -281,10 +311,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
log_info("\n");
log_info("Test names:\n");
for (int i = 0; i < testNum; i++)
{
log_info("\t%s\n", testList[i].name);
}
list_tests(testNum, testList);
return EXIT_SUCCESS;
}
@@ -320,8 +347,14 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
/* Do we have a CPU/GPU specification? */
if (argc > 1)
{
if (strcmp(argv[argc - 1], "gpu") == 0
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
if (strcmp(argv[argc - 1], "all") == 0
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ALL") == 0)
{
device_type = CL_DEVICE_TYPE_ALL;
argc--;
}
else if (strcmp(argv[argc - 1], "gpu") == 0
|| strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
{
device_type = CL_DEVICE_TYPE_GPU;
argc--;
@@ -376,6 +409,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
switch (device_type)
{
case CL_DEVICE_TYPE_ALL: log_info("Requesting any device "); break;
case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break;
case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break;
case CL_DEVICE_TYPE_ACCELERATOR:

View File

@@ -28,8 +28,8 @@ public:
Version(): m_major(0), m_minor(0) {}
Version(cl_uint major, cl_uint minor): m_major(major), m_minor(minor) {}
int major() const { return m_major; }
int minor() const { return m_minor; }
int get_major() const { return m_major; }
int get_minor() const { return m_minor; }
bool operator>(const Version &rhs) const
{
return to_uint() > rhs.to_uint();

View File

@@ -1,5 +1,7 @@
set(MODULE_NAME API)
find_package(Python3 COMPONENTS Interpreter QUIET)
set(${MODULE_NAME}_SOURCES
main.cpp
negative_platform.cpp
@@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES
test_pipe_properties_queries.cpp
test_wg_suggested_local_work_size.cpp
test_device_command_queue.cpp
test_spirv_queries.cpp
${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
COMMENT "Generating spirv_capability_deps.def..."
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py
--grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
--output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def"
DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
USES_TERMINAL
VERBATIM)
include(../CMakeCommon.txt)
target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python3
#####################################################################
# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#####################################################################
"""
Generates a file describing the SPIR-V extension dependencies or SPIR-V version
dependencies for a SPIR-V capability. This can be used to ensure that if support
for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V
version is also supported.
"""
import argparse
import json
header_text = """\
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is generated from the SPIR-V JSON grammar file.
// Please do not edit it directly!
"""
def main():
parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities')
parser.add_argument('--grammar', metavar='<path>',
type=str, required=True,
help='input JSON grammar file')
parser.add_argument('--output', metavar='<path>',
type=str, required=False,
help='output file path (default: stdout)')
args = parser.parse_args()
dependencies = {}
capabilities = []
with open(args.grammar) as json_file:
grammar_json = json.loads(json_file.read())
for operand_kind in grammar_json['operand_kinds']:
if operand_kind['kind'] == 'Capability':
for cap in operand_kind['enumerants']:
capname = cap['enumerant']
capabilities.append(capname)
dependencies[capname] = {}
dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else []
dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else ""
capabilities.sort()
output = []
output.append(header_text)
output.append("// clang-format off")
if False:
for cap in capabilities:
deps = dependencies[cap]
extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions'])
output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format(
cap, extensions_str, deps['version']))
else:
for cap in capabilities:
deps = dependencies[cap]
if deps['version'] != "":
output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version']))
for ext in deps['extensions']:
output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext))
output.append("// clang-format on")
if args.output:
with open(args.output, 'w') as output_file:
output_file.write('\n'.join(output))
else:
print('\n'.join(output))
if __name__ == '__main__':
main()

View File

@@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size)
pixelBytes = maxAllocSize / maxDimensionPixels;
if (pixelBytes == 0)
{
log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
"CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
"of maximum size!\n");
return -1;
log_info(
"Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, "
"therefore the size of the allocated image may be larger than the "
"scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n",
maxDimensionPixels, maxAllocSize);
pixelBytes = 1;
}
error = -1;

View File

@@ -87,6 +87,35 @@ const char *sample_two_kernel_program[] = {
"\n"
"}\n" };
const char *sample_mem_obj_size_test_kernel = R"(
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
}
)";
const char *sample_local_size_test_kernel = R"(
__kernel void local_size_test(__local int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
}
)";
const char *sample_read_only_image_test_kernel = R"(
__kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src)
{
write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]);
}
)";
const char *sample_write_only_image_test_kernel = R"(
__kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst)
{
dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1)));
}
)";
REGISTER_TEST(get_kernel_info)
{
@@ -669,7 +698,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
test_error(error,
"Unable to get sample_image_test kernel for built program");
std::vector<cl_uchar> mem_data(size_dim * size_dim);
std::vector<cl_uchar> mem_data(size_dim * size_dim * 4);
buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
sizeof(cl_int) * size_dim, mem_data.data(), &error);
test_error(error, "clCreateBuffer failed");
@@ -704,3 +733,169 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_mem_obj)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper mem_obj_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_mem_obj_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error);
test_error(error,
"Unable to get mem_obj_size_test kernel for built program");
std::vector<cl_uchar> mem_data(256, 0);
clMemWrapper buffer = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error);
test_error(error, "clCreateBuffer failed");
// Run the test - CL_INVALID_ARG_SIZE
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a memory object and arg_size > sizeof(cl_mem)",
TEST_FAIL);
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a memory object and arg_size < sizeof(cl_mem)",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_kernel)
{
cl_int error = CL_SUCCESS;
clKernelWrapper kernel;
clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_float), NULL, &error);
test_error(error, "clCreateBuffer failed");
// Run the test - CL_INVALID_KERNEL
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
test_failure_error_ret(
error, CL_INVALID_KERNEL,
"clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel "
"is not a valid kernel object",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_index)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper kernel;
// Setup the test
error = create_single_kernel_helper(context, &program, nullptr, 1,
sample_single_test_kernel, nullptr);
test_error(error, "Unable to build test program");
kernel = clCreateKernel(program, "sample_test", &error);
test_error(error, "Unable to get sample_test kernel for built program");
// Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr);
test_failure_error_ret(
error, CL_INVALID_ARG_INDEX,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when "
"arg_index is not a valid argument index",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_size_local)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper local_arg_kernel;
// Setup the test
error = create_single_kernel_helper(
context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
local_arg_kernel = clCreateKernel(program, "local_size_test", &error);
test_error(error, "Unable to get local_size_test kernel for built program");
// Run the test
error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is "
"passed to a local qualifier kernel argument",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_set_read_write_image_arg)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper write_image_kernel, read_image_kernel;
clMemWrapper write_only_image, read_only_image;
const char *test_kernels[2] = { sample_read_only_image_test_kernel,
sample_write_only_image_test_kernel };
constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
const int size_dim = 128;
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
// Setup the test
error = create_single_kernel_helper(context, &program, nullptr, 2,
test_kernels, nullptr);
test_error(error, "Unable to build test program");
read_image_kernel = clCreateKernel(program, "read_only_image_test", &error);
test_error(error,
"Unable to get read_only_image_test kernel for built program");
write_image_kernel =
clCreateKernel(program, "write_only_image_test", &error);
test_error(error,
"Unable to get write_only_image_test kernel for built program");
read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format,
size_dim, size_dim, 0, nullptr, &error);
test_error(error, "create_image_2d failed");
write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format,
size_dim, size_dim, 0, nullptr, &error);
test_error(error, "create_image_2d failed");
// Run the test
error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image),
&read_only_image);
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
"clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_VALUE when an image is "
"created with CL_MEM_READ_ONLY is "
"passed to a write_only kernel argument",
TEST_FAIL);
error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image),
&write_only_image);
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
"clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_VALUE when an image is "
"created with CL_MEM_WRITE_ONLY is "
"passed to a read_only kernel argument",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -0,0 +1,767 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <algorithm>
#include <map>
#include <vector>
#define SPV_ENABLE_UTILITY_CODE
#include <spirv/unified1/spirv.hpp>
static bool is_spirv_version_supported(cl_device_id deviceID,
const std::string& version)
{
std::string ilVersions = get_device_il_version_string(deviceID);
return ilVersions.find(version) != std::string::npos;
}
static int doQueries(cl_device_id device,
std::vector<const char*>& extendedInstructionSets,
std::vector<const char*>& extensions,
std::vector<cl_uint>& capabilities)
{
cl_int error = CL_SUCCESS;
size_t size = 0;
error =
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
0, nullptr, &size);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n");
extendedInstructionSets.resize(size / sizeof(const char*));
error =
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
size, extendedInstructionSets.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n");
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr,
&size);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n");
extensions.resize(size / sizeof(const char*));
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size,
extensions.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n");
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0,
nullptr, &size);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n");
capabilities.resize(size / sizeof(cl_uint));
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size,
capabilities.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n");
return CL_SUCCESS;
}
static int findRequirements(cl_device_id device,
std::vector<const char*>& extendedInstructionSets,
std::vector<const char*>& extensions,
std::vector<cl_uint>& capabilities)
{
cl_int error = CL_SUCCESS;
auto version = get_device_cl_version(device);
auto ilVersions = get_device_il_version_string(device);
// If no SPIR-V versions are supported, there are no requirements.
if (ilVersions.find("SPIR-V") == std::string::npos)
{
return CL_SUCCESS;
}
cl_bool deviceImageSupport = CL_FALSE;
cl_bool deviceReadWriteImageSupport = CL_FALSE;
cl_bool deviceSubGroupsSupport = CL_FALSE;
cl_bool deviceGenericAddressSpaceSupport = CL_FALSE;
cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE;
cl_bool devicePipeSupport = CL_FALSE;
cl_bool deviceDeviceEnqueueSupport = CL_FALSE;
cl_device_integer_dot_product_capabilities_khr
deviceIntegerDotProductCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0;
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
sizeof(deviceImageSupport), &deviceImageSupport,
nullptr);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n");
if (version >= Version(2, 0))
{
cl_uint deviceMaxReadWriteImageArgs = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
sizeof(deviceMaxReadWriteImageArgs),
&deviceMaxReadWriteImageArgs, nullptr);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
deviceReadWriteImageSupport =
deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
}
if (version >= Version(2, 1))
{
cl_uint deviceMaxNumSubGroups = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS,
sizeof(deviceMaxNumSubGroups),
&deviceMaxNumSubGroups, nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n");
deviceSubGroupsSupport =
deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE;
}
else if (is_extension_available(device, "cl_khr_subgroups"))
{
deviceSubGroupsSupport = CL_TRUE;
}
if (version >= Version(3, 0))
{
error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
sizeof(deviceGenericAddressSpaceSupport),
&deviceGenericAddressSpaceSupport, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n");
error = clGetDeviceInfo(
device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT,
sizeof(deviceWorkGroupCollectiveFunctionsSupport),
&deviceWorkGroupCollectiveFunctionsSupport, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n");
error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT,
sizeof(devicePipeSupport), &devicePipeSupport,
nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n");
cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities =
0;
error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
sizeof(deviceDeviceEnqueueCapabilities),
&deviceDeviceEnqueueCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n");
deviceDeviceEnqueueSupport =
deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE;
}
else if (version >= Version(2, 0))
{
deviceGenericAddressSpaceSupport = CL_TRUE;
deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE;
devicePipeSupport = CL_TRUE;
deviceDeviceEnqueueSupport = CL_TRUE;
}
if (is_extension_available(device, "cl_khr_integer_dot_product"))
{
error = clGetDeviceInfo(device,
CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
sizeof(deviceIntegerDotProductCapabilities),
&deviceIntegerDotProductCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n");
}
if (is_extension_available(device, "cl_ext_float_atomics"))
{
error =
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp32AtomicCapabilities),
&deviceFp32AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n");
error =
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp16AtomicCapabilities),
&deviceFp16AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n");
error =
clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp64AtomicCapabilities),
&deviceFp64AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n");
}
// Required.
extendedInstructionSets.push_back("OpenCL.std");
capabilities.push_back(spv::CapabilityAddresses);
capabilities.push_back(spv::CapabilityFloat16Buffer);
capabilities.push_back(spv::CapabilityInt16);
capabilities.push_back(spv::CapabilityInt8);
capabilities.push_back(spv::CapabilityKernel);
capabilities.push_back(spv::CapabilityLinkage);
capabilities.push_back(spv::CapabilityVector16);
// Required for FULL_PROFILE devices, or devices supporting
// cles_khr_int64.
if (gHasLong)
{
capabilities.push_back(spv::CapabilityInt64);
}
// Required for devices supporting images.
if (deviceImageSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityImage1D);
capabilities.push_back(spv::CapabilityImageBasic);
capabilities.push_back(spv::CapabilityImageBuffer);
capabilities.push_back(spv::CapabilityLiteralSampler);
capabilities.push_back(spv::CapabilitySampled1D);
capabilities.push_back(spv::CapabilitySampledBuffer);
}
// Required for devices supporting SPIR-V 1.6.
if (ilVersions.find("SPIR-V_1.6") != std::string::npos)
{
capabilities.push_back(spv::CapabilityUniformDecoration);
}
// Required for devices supporting read-write images.
if (deviceReadWriteImageSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityImageReadWrite);
}
// Required for devices supporting the generic address space.
if (deviceGenericAddressSpaceSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityGenericPointer);
}
// Required for devices supporting sub-groups or work-group collective
// functions.
if (deviceSubGroupsSupport == CL_TRUE
|| deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityGroups);
}
// Required for devices supporting pipes.
if (devicePipeSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityPipes);
}
// Required for devices supporting device-side enqueue.
if (deviceDeviceEnqueueSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityDeviceEnqueue);
}
// Required for devices supporting SPIR-V 1.1 and OpenCL 2.2.
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
&& version == Version(2, 2))
{
capabilities.push_back(spv::CapabilityPipeStorage);
}
// Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or
// OpenCL 3.0 devices supporting sub-groups.
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
&& (version == Version(2, 2)
|| (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE)))
{
capabilities.push_back(spv::CapabilitySubgroupDispatch);
}
// Required for devices supporting cl_khr_expect_assume.
if (is_extension_available(device, "cl_khr_expect_assume"))
{
extensions.push_back("SPV_KHR_expect_assume");
capabilities.push_back(spv::CapabilityExpectAssumeKHR);
}
// Required for devices supporting cl_khr_extended_bit_ops.
if (is_extension_available(device, "cl_khr_extended_bit_ops"))
{
extensions.push_back("SPV_KHR_bit_instructions");
capabilities.push_back(spv::CapabilityBitInstructions);
}
// Required for devices supporting half-precision floating-point
// (cl_khr_fp16).
if (is_extension_available(device, "cl_khr_fp16"))
{
capabilities.push_back(spv::CapabilityFloat16);
}
// Required for devices supporting double-precision floating-point
// (cl_khr_fp64).
if (is_extension_available(device, "cl_khr_fp64"))
{
capabilities.push_back(spv::CapabilityFloat64);
}
// Required for devices supporting 64-bit atomics
// (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics).
if (is_extension_available(device, "cl_khr_int64_base_atomics")
|| is_extension_available(device, "cl_khr_int64_extended_atomics"))
{
capabilities.push_back(spv::CapabilityInt64Atomics);
}
// Required for devices supporting cl_khr_integer_dot_product.
if (is_extension_available(device, "cl_khr_integer_dot_product"))
{
extensions.push_back("SPV_KHR_integer_dot_product");
capabilities.push_back(spv::CapabilityDotProduct);
capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked);
}
// Required for devices supporting cl_khr_integer_dot_product and
// CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR.
if (is_extension_available(device, "cl_khr_integer_dot_product")
&& (deviceIntegerDotProductCapabilities
& CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
{
capabilities.push_back(spv::CapabilityDotProductInput4x8Bit);
}
// Required for devices supporting cl_khr_kernel_clock.
if (is_extension_available(device, "cl_khr_kernel_clock"))
{
extensions.push_back("SPV_KHR_shader_clock");
capabilities.push_back(spv::CapabilityShaderClockKHR);
}
// Required for devices supporting both cl_khr_mipmap_image and
// cl_khr_mipmap_image_writes.
if (is_extension_available(device, "cl_khr_mipmap_image")
&& is_extension_available(device, "cl_khr_mipmap_image_writes"))
{
capabilities.push_back(spv::CapabilityImageMipmap);
}
// Required for devices supporting cl_khr_spirv_extended_debug_info.
if (is_extension_available(device, "cl_khr_spirv_extended_debug_info"))
{
extendedInstructionSets.push_back("OpenCL.DebugInfo.100");
}
// Required for devices supporting cl_khr_spirv_linkonce_odr.
if (is_extension_available(device, "cl_khr_spirv_linkonce_odr"))
{
extensions.push_back("SPV_KHR_linkonce_odr");
}
// Required for devices supporting
// cl_khr_spirv_no_integer_wrap_decoration.
if (is_extension_available(device,
"cl_khr_spirv_no_integer_wrap_decoration"))
{
extensions.push_back("SPV_KHR_no_integer_wrap_decoration");
}
// Required for devices supporting cl_khr_subgroup_ballot.
if (is_extension_available(device, "cl_khr_subgroup_ballot"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformBallot);
}
// Required for devices supporting cl_khr_subgroup_clustered_reduce.
if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformClustered);
}
// Required for devices supporting cl_khr_subgroup_named_barrier.
if (is_extension_available(device, "cl_khr_subgroup_named_barrier"))
{
capabilities.push_back(spv::CapabilityNamedBarrier);
}
// Required for devices supporting
// cl_khr_subgroup_non_uniform_arithmetic.
if (is_extension_available(device,
"cl_khr_subgroup_non_uniform_arithmetic"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic);
}
// Required for devices supporting cl_khr_subgroup_non_uniform_vote.
if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
{
capabilities.push_back(spv::CapabilityGroupNonUniform);
capabilities.push_back(spv::CapabilityGroupNonUniformVote);
}
// Required for devices supporting cl_khr_subgroup_rotate.
if (is_extension_available(device, "cl_khr_subgroup_rotate"))
{
extensions.push_back("SPV_KHR_subgroup_rotate");
capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR);
}
// Required for devices supporting cl_khr_subgroup_shuffle.
if (is_extension_available(device, "cl_khr_subgroup_shuffle"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformShuffle);
}
// Required for devices supporting cl_khr_subgroup_shuffle_relative.
if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative);
}
// Required for devices supporting cl_khr_work_group_uniform_arithmetic.
if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic"))
{
extensions.push_back("SPV_KHR_uniform_group_instructions");
capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR);
}
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
extensions.push_back("SPV_EXT_shader_atomic_float16_add");
capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16, fp32,
// or fp64 atomic min or max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& ((deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|| (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|| (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))))
{
extensions.push_back("SPV_EXT_shader_atomic_float_min_max");
}
// Required for devices supporting cl_ext_float_atomics and fp32 or fp64
// atomic adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& ((deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))
|| (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))))
{
extensions.push_back("SPV_EXT_shader_atomic_float_add");
}
// Required for devices supporting cl_intel_bfloat16_conversions.
if (is_extension_available(device, "cl_intel_bfloat16_conversions"))
{
extensions.push_back("SPV_INTEL_bfloat16_conversion");
capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL);
}
// Required for devices supporting
// cl_intel_spirv_device_side_avc_motion_estimation.
if (is_extension_available(
device, "cl_intel_spirv_device_side_avc_motion_estimation"))
{
extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation");
capabilities.push_back(
spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL);
capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL);
capabilities.push_back(
spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL);
}
// Required for devices supporting cl_intel_spirv_media_block_io.
if (is_extension_available(device, "cl_intel_spirv_media_block_io"))
{
extensions.push_back("SPV_INTEL_media_block_io");
capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL);
}
// Required for devices supporting cl_intel_spirv_subgroups.
if (is_extension_available(device, "cl_intel_spirv_subgroups"))
{
extensions.push_back("SPV_INTEL_subgroups");
capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL);
capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL);
capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL);
}
// Required for devices supporting cl_intel_split_work_group_barrier.
if (is_extension_available(device, "cl_intel_split_work_group_barrier"))
{
extensions.push_back("SPV_INTEL_split_barrier");
capabilities.push_back(spv::CapabilitySplitBarrierINTEL);
}
// Required for devices supporting cl_intel_subgroup_buffer_prefetch.
if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch"))
{
extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch");
capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL);
}
return CL_SUCCESS;
}
REGISTER_TEST(spirv_query_requirements)
{
if (!is_extension_available(device, "cl_khr_spirv_queries"))
{
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error;
std::vector<const char*> queriedExtendedInstructionSets;
std::vector<const char*> queriedExtensions;
std::vector<cl_uint> queriedCapabilities;
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
queriedCapabilities);
test_error_fail(error, "Unable to perform SPIR-V queries");
std::vector<const char*> requiredExtendedInstructionSets;
std::vector<const char*> requiredExtensions;
std::vector<cl_uint> requiredCapabilities;
error = findRequirements(device, requiredExtendedInstructionSets,
requiredExtensions, requiredCapabilities);
test_error_fail(error, "Unable to find SPIR-V requirements");
for (auto check : requiredExtendedInstructionSets)
{
auto cmp = [=](const char* queried) {
return strcmp(check, queried) == 0;
};
auto it = std::find_if(queriedExtendedInstructionSets.begin(),
queriedExtendedInstructionSets.end(), cmp);
if (it == queriedExtendedInstructionSets.end())
{
test_fail("Missing required extended instruction set: %s\n", check);
}
}
for (auto check : requiredExtensions)
{
auto cmp = [=](const char* queried) {
return strcmp(check, queried) == 0;
};
auto it = std::find_if(queriedExtensions.begin(),
queriedExtensions.end(), cmp);
if (it == queriedExtensions.end())
{
test_fail("Missing required extension: %s\n", check);
}
}
for (auto check : requiredCapabilities)
{
if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(),
check)
== queriedCapabilities.end())
{
test_fail(
"Missing required capability: %s\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
}
}
// Find any extraneous capabilities (informational):
for (auto check : queriedCapabilities)
{
if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(),
check)
== requiredCapabilities.end())
{
log_info(
"Found non-required capability: %s\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
}
}
return TEST_PASS;
}
REGISTER_TEST(spirv_query_dependencies)
{
if (!is_extension_available(device, "cl_khr_spirv_queries"))
{
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error;
std::vector<const char*> queriedExtendedInstructionSets;
std::vector<const char*> queriedExtensions;
std::vector<cl_uint> queriedCapabilities;
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
queriedCapabilities);
test_error_fail(error, "Unable to perform SPIR-V queries");
struct CapabilityDependencies
{
std::vector<std::string> extensions;
std::string version;
};
std::map<spv::Capability, CapabilityDependencies> dependencies;
#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \
dependencies[spv::Capability##_cap].version = _ver;
#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \
dependencies[spv::Capability##_cap].extensions.push_back(_ext);
#include "spirv_capability_deps.def"
// For each queried SPIR-V capability, ensure that either that any SPIR-V
// version dependencies or SPIR-V extension dependencies are satisfied.
for (auto check : queriedCapabilities)
{
// Log and skip any unknown capabilities
auto it = dependencies.find(static_cast<spv::Capability>(check));
if (it == dependencies.end())
{
log_info(
"No known dependencies for queried capability %s!\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
continue;
}
// Check if a SPIR-V version dependency is satisfied
const auto& version_dep = it->second.version;
if (!version_dep.empty()
&& is_spirv_version_supported(device, version_dep))
{
continue;
}
// Check if a SPIR-V extension dependency is satisfied
bool found = false;
for (const auto& extension_dep : it->second.extensions)
{
if (std::find(queriedExtensions.begin(), queriedExtensions.end(),
extension_dep)
!= queriedExtensions.end())
{
found = true;
break;
}
}
if (found)
{
continue;
}
// If we get here then the capability has an unsatisfied dependency.
log_error("Couldn't find a dependency for queried capability %s!\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
if (!version_dep.empty())
{
log_error("Checked for SPIR-V version %s.\n", version_dep.c_str());
}
for (const auto& extension_dep : it->second.extensions)
{
log_error("Checked for SPIR-V extension %s.n",
extension_dep.c_str());
}
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1))
nullptr);
test_error(error, "clGetDeviceInfo failed");
max_local = max_work_item_sizes[0];
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_local), &max_local, nullptr);
test_error(error, "clGetKernelWorkGroupInfo failed");
max_local = std::min(max_local, max_work_item_sizes[0]);
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
(void *)&platform, NULL);

View File

@@ -1,9 +1,5 @@
set(MODULE_NAME BASIC)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
add_cxx_flag_if_supported(-Wno-narrowing)
endif()
set(${MODULE_NAME}_SOURCES
main.cpp
test_fpmath.cpp

View File

@@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy)
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = { num_elements, 0, 0 };
size_t threads[3] = { static_cast<size_t>(num_elements), 0, 0 };
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");

View File

@@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format(
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t img_width = 512;
size_t img_height = 512;
size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;

View File

@@ -14,7 +14,6 @@
// limitations under the License.
//
#include <cmath>
using std::isnan;
#include "harness/compat.h"
#include <stdio.h>
@@ -26,6 +25,7 @@ using std::isnan;
#include <CL/cl_half.h>
#include "harness/conversions.h"
#include "harness/mathHelpers.h"
#include "harness/typeWrappers.h"
extern cl_half_rounding_mode halfRoundingMode;
@@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = {
// clang-format on
bool IsHalfNaN(cl_half v)
{
// Extract FP16 exponent and mantissa
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
// NaN test
return (h_exp == 0x1F && h_mant != 0);
}
static int test_explicit_s2v_function(cl_context context,
cl_command_queue queue, cl_kernel kernel,
ExplicitType srcType, unsigned int count,
@@ -183,20 +173,21 @@ static int test_explicit_s2v_function(cl_context context,
{
bool isSrcNaN =
(((srcType == kHalf)
&& IsHalfNaN(*reinterpret_cast<cl_half *>(inPtr)))
&& isnan_fp(*reinterpret_cast<cl_half *>(inPtr)))
|| ((srcType == kFloat)
&& isnan(*reinterpret_cast<cl_float *>(inPtr)))
&& isnan_fp(*reinterpret_cast<cl_float *>(inPtr)))
|| ((srcType == kDouble)
&& isnan(*reinterpret_cast<cl_double *>(inPtr))));
bool isDestNaN = (((destType == kHalf)
&& IsHalfNaN(*reinterpret_cast<cl_half *>(
outPtr + destTypeSize * s)))
|| ((destType == kFloat)
&& isnan(*reinterpret_cast<cl_float *>(
outPtr + destTypeSize * s)))
|| ((destType == kDouble)
&& isnan(*reinterpret_cast<cl_double *>(
outPtr + destTypeSize * s))));
&& isnan_fp(*reinterpret_cast<cl_double *>(inPtr))));
bool isDestNaN =
(((destType == kHalf)
&& isnan_fp(*reinterpret_cast<cl_half *>(
outPtr + destTypeSize * s)))
|| ((destType == kFloat)
&& isnan_fp(*reinterpret_cast<cl_float *>(
outPtr + destTypeSize * s)))
|| ((destType == kDouble)
&& isnan_fp(*reinterpret_cast<cl_double *>(
outPtr + destTypeSize * s))));
if (isSrcNaN && isDestNaN)
{

View File

@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "harness/compat.h"
#include "harness/mathHelpers.h"
#include "harness/rounding_mode.h"
#include "harness/stringHelpers.h"
@@ -57,16 +58,6 @@ template <typename T> double toDouble(T val)
return val;
}
bool isHalfNan(cl_half v)
{
// Extract FP16 exponent and mantissa
uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = v & 0x3FF;
// NaN test
return (h_exp == 0x1F && h_mant != 0);
}
cl_half half_plus(cl_half a, cl_half b)
{
return HFF(std::plus<float>()(HTF(a), HTF(b)));
@@ -101,14 +92,7 @@ int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
T r = test.ref(inA[i], inB[i]);
bool both_nan = false;
if (std::is_same<T, cl_half>::value)
{
both_nan = isHalfNan(r) && isHalfNan(output[i]);
}
else if (std::is_floating_point<T>::value)
{
both_nan = std::isnan(r) && std::isnan(output[i]);
}
both_nan = isnan_fp(r) && isnan_fp(output[i]);
// If not both nan, check if the result is the same
if (!both_nan && (r != output[i]))

View File

@@ -100,8 +100,8 @@ REGISTER_TEST(hostptr)
cl_image_format img_format;
cl_uchar *rgba8_inptr, *rgba8_outptr;
void *lock_buffer;
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_int err;
MTdata d;
RoundingMode oldRoundMode;

View File

@@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst)
int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
{
const cl_int results[] = {
0x12345678, 0x23456781, 0x34567812, 0x45678123,
0x56781234, 0x67812345, 0x78123456, 0x81234567,
(cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812,
(cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345,
(cl_int)0x78123456, (cl_int)0x81234567,
};
auto predicate = [&results](cl_int a, cl_int b) {

View File

@@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int
REGISTER_TEST(image_multipass_integer_coord)
{
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
@@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord)
REGISTER_TEST(image_multipass_float_coord)
{
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;

View File

@@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format(
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper buffer, image;
const int img_width = 512;
const int img_height = 512;
const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
const size_t img_width = 512;
const size_t img_height = 512;
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;

View File

@@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
MTdataHolder d(gRandomSeed);
@@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
for (i = 0; i < 3; i++)
{
void *p, *outp;
int x, y, delta_w = img_width / 8, delta_h = img_height / 16;
size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16;
switch (i)
{
@@ -197,10 +197,11 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
copy_origin, copy_region, 0, NULL, NULL);
if (err)
{
log_error("Copy %d (origin [%d, %d], size [%d, %d], image "
"size [%d x %d]) Failed\n",
copy_number, x, y, delta_w, delta_h, img_width,
img_height);
log_error(
"Copy %d (origin [%zu, %zu], size [%zu, %zu], image "
"size [%zu x %zu]) Failed\n",
copy_number, x, y, delta_w, delta_h, img_width,
img_height);
}
test_error(err, "clEnqueueCopyImage failed");
}

View File

@@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context,
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 128;
int img_height = 128;
int img_depth = 64;
size_t img_width = 128;
size_t img_height = 128;
size_t img_depth = 64;
int i;
cl_int err;
unsigned num_elements = img_width * img_height * img_depth * 4;

View File

@@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot)
cl_kernel kernel;
size_t global_threads[3], local_threads[3];
size_t local_workgroup_size;
int img_width;
int img_height;
size_t img_width;
size_t img_height;
int err;
cl_uint m;
size_t max_local_workgroup_size[3];

View File

@@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy)
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, j;
cl_int err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
log_info("Testing with image %d x %d.\n", img_width, img_height);
log_info("Testing with image %zu x %zu.\n", img_width, img_height);
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
@@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy)
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin,
region, 0, 0, p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (j=0; j<NUM_COPIES; j++)

View File

@@ -195,8 +195,8 @@ REGISTER_TEST(imagereadwrite)
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int num_tries = 200;
int i, j, err;
MTdataHolder d(gRandomSeed);
@@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite)
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
{
int x = (int)get_random_float(0, img_width, d);
int y = (int)get_random_float(0, img_height, d);
int w = (int)get_random_float(1, (img_width - x), d);
int h = (int)get_random_float(1, (img_height - y), d);
size_t x = (size_t)get_random_float(0, img_width, d);
size_t y = (size_t)get_random_float(0, img_height, d);
size_t w = (size_t)get_random_float(1, (img_width - x), d);
size_t h = (size_t)get_random_float(1, (img_height - y), d);
size_t input_pitch;
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
int packed_update = (int)(genrand_int32(d) & 0x01);
@@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;

View File

@@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d)
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 64;
int img_height = 64;
int img_depth = 32;
int img_slice = img_width * img_height;
size_t img_width = 64;
size_t img_height = 64;
size_t img_depth = 32;
size_t img_slice = img_width * img_height;
int num_tries = 30;
int i, j, err;
MTdataHolder mtData(gRandomSeed);
@@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d)
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
{
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData);
size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData);
size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData);
size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData);
size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData);
size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData);
size_t input_pitch, input_slice_pitch;
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
int packed_update = (int)(genrand_int32(mtData) & 0x01);
@@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;

View File

@@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function)
clKernelWrapper kernel1, kernel2, kernel_to_call;
clMemWrapper streams[2];
size_t threads[] = {num_elements,1,1};
size_t threads[] = { static_cast<size_t>(num_elements), 1, 1 };
cl_int *input, *output, *expected;
cl_int times = 4;
int pass = 0;

View File

@@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple)
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
MTdata d;

View File

@@ -100,8 +100,8 @@ REGISTER_TEST(mri_one)
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};

View File

@@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
}
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
@@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);

View File

@@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange
maxWorkItemSizes[2] };
// check if maximum work group size for current dimention is not
// exceeded
cl_uint work_group_size = max_workgroup_size + 1;
size_t work_group_size = max_workgroup_size + 1;
while (max_workgroup_size < work_group_size && work_group_size != 1)
{
work_group_size = 1;
@@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange
// compute max number of work groups based on buffer size and max
// group size
cl_uint max_work_groups = testData.size() / work_group_size;
size_t max_work_groups = testData.size() / work_group_size;
// take into account number of dimentions
cl_uint work_groups_per_dim =
size_t work_groups_per_dim =
std::max(1, (int)pow(max_work_groups, 1.f / dim));
for (size_t j = 0; j < dim; j++)

View File

@@ -24,8 +24,9 @@
#include "CL/cl_half.h"
#include <vector>
#include <iomanip>
#include <sstream>
#include <vector>
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount()
@@ -74,9 +75,11 @@ extern int
gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
extern const char *
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
@@ -174,6 +177,13 @@ public:
{
return false;
}
virtual bool
IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue)
{
return expected != testValues[whichDestValue];
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
@@ -883,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
header += std::string("__global volatile ") + aTypeName + " destMemory["
+ ss.str() + "] = {\n";
ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
!= TYPE_ATOMIC_HALF)
ss << _startValue;
else
== TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF)
ss << static_cast<HostDataType>(
cl_half_to_float(static_cast<cl_half>(_startValue)));
else
ss << _startValue;
for (cl_uint i = 0; i < maxNumDestItems; i++)
{
if (aTypeName == "atomic_flag")
@@ -1449,7 +1460,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
startRefValues.size() ? &startRefValues[0] : 0, i))
break; // no expected value function provided
if (expected != destItems[i])
if (IsTestNotAsExpected(expected, destItems, i))
{
std::stringstream logLine;
logLine << "ERROR: Result " << i

View File

@@ -17,6 +17,9 @@
#define HOST_ATOMICS_H_
#include "harness/testHarness.h"
#include <mutex>
#include "CL/cl_half.h"
#ifdef WIN32
#include "Windows.h"
@@ -87,6 +90,8 @@ enum TExplicitMemoryOrderType
#define HOST_FLAG cl_int
extern cl_half_rounding_mode gHalfRoundingMode;
// host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
@@ -94,28 +99,51 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a += c;
return old_value;
}
else
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchangeAdd(a, c);
return InterlockedExchangeAdd(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_add(a, c);
return __sync_fetch_and_add(a, c);
#else
log_info("Host function not implemented: atomic_fetch_add\n");
return 0;
log_info("Host function not implemented: atomic_fetch_add\n");
return 0;
#endif
}
}
template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_sub(a, c);
return __sync_fetch_and_sub(a, c);
#else
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
#endif
}
}
template <typename AtomicType, typename CorrespondingType>
@@ -144,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_success,
TExplicitMemoryOrderType order_failure)
{
CorrespondingType tmp;
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected)
{
*reinterpret_cast<volatile float *>(a) = desired;
return true;
}
*expected = tmp;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
#elif defined(__GNUC__)
tmp = __sync_val_compare_and_swap(a, *expected, desired);
tmp = __sync_val_compare_and_swap(a, *expected, desired);
#else
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
#endif
if(tmp == *expected)
return true;
*expected = tmp;
return false;
if (tmp == *expected) return true;
*expected = tmp;
}
return false;
}
template <typename AtomicType, typename CorrespondingType>

View File

@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -132,6 +133,12 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics"))
{
gFloatAtomicsSupported = true;
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16"))
{
cl_int error = clGetDeviceInfo(

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES
# needed by Vulkan wrapper to compile
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
add_cxx_flag_if_supported(-Wmisleading-indentation)
add_cxx_flag_if_supported(-Wno-narrowing)
add_cxx_flag_if_supported(-Wno-format)
add_cxx_flag_if_supported(-Wno-error)
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive

View File

@@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage(
size_t clImageFormatSize;
cl_image_desc image_desc;
memset(&image_desc, 0x0, sizeof(cl_image_desc));
cl_image_format img_format = { 0 };
img_format = { 0 };
const VkImageCreateInfo VulkanImageCreateInfo =
image2D.getVkImageCreateInfo();
@@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue)
import.fd = fd;
import.pNext = nullptr;
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
import.flags = 0;
import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT;
VkResult res =
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);

View File

@@ -106,6 +106,7 @@ protected:
cl_mem m_externalMemory;
int fd;
void *handle;
cl_image_format img_format;
clExternalMemoryImage();
public:
@@ -117,6 +118,7 @@ public:
cl_device_id deviceId);
virtual ~clExternalMemoryImage();
cl_mem getExternalMemoryImage();
cl_image_format getImageFormat() { return img_format; };
};
class clExternalSemaphore {

View File

@@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList(
VkPhysicalDeviceExternalBufferInfo buffer_info = {};
buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
| VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VkExternalBufferProperties buffer_properties = {};
buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
@@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice)
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr,
handle_type.vk_type
};
VkExternalSemaphoreProperties query_result = {};
VkExternalSemaphoreProperties query_result = {
VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES
};
vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(
vkDevice.getPhysicalDevice(), &handle_query, &query_result);
if (query_result.externalSemaphoreFeatures

View File

@@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES
include(../CMakeCommon.txt)
# Include the relative paths to SPV assembly files
configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
# Determine the corresponding binary outputs to the SPV assembly input files
set(COMPILER_ASM_REL_PATH spirv_asm)
set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}")
set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin")
# Copy the required test include directories into the build directory.
if(NOT DEFINED COMPILER_TEST_RESOURCES)
set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
endif()
set(COMPILER_SPV_EXTRA "")
if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND
IS_DIRECTORY "${SPIRV_TOOLS_DIR}")
message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'")
set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val")
endif()
set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py)
include(CMakePrintHelpers)
cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT)
add_custom_command(
COMMENT "Copying compiler test resources..."
TARGET ${${MODULE_NAME}_OUT}
@@ -30,7 +51,10 @@ add_custom_command(
${COMPILER_TEST_RESOURCES}/includeTestDirectory
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
VERBATIM)
include(GNUInstallDirs)

View File

@@ -0,0 +1,4 @@
set(COMPILER_SPIRV_NEW_ASM
compiler_spirv_asm/write_kernel.spvasm32
compiler_spirv_asm/write_kernel.spvasm64
)

View File

@@ -14,6 +14,9 @@
// limitations under the License.
//
#include "testBase.h"
#include <filesystem>
#if defined(_WIN32)
#include <time.h>
#elif defined(__linux__) || defined(__APPLE__)
@@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link)
return 0;
}
#if defined(__APPLE__) || defined(__linux)
#define _mkdir(x) mkdir(x, S_IRWXU)
#define _chdir chdir
#define _rmdir rmdir
#define _unlink unlink
#else
#include <direct.h>
#endif
REGISTER_TEST(execute_after_included_header_link)
{
int error;
@@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link)
}
/* setup */
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
/* Some tests systems doesn't allow one to write in the test directory */
if (_chdir("/tmp") != 0)
std::error_code ec;
auto temp_dir_path = std::filesystem::temp_directory_path(ec);
if (ec)
{
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to get the temporary directory path\n");
return -1;
}
#endif
if (_mkdir("foo") != 0)
temp_dir_path = temp_dir_path / "foo" / "bar";
std::filesystem::create_directories(temp_dir_path, ec);
if (ec)
{
log_error("ERROR: Unable to create directory foo! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n",
temp_dir_path.u8string().c_str(), ec.value(),
ec.message().c_str());
return -1;
}
if (_mkdir("foo/bar") != 0)
{
log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_chdir("foo/bar") != 0)
{
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
FILE *simple_header_file = fopen(simple_header_name, "w");
const auto simple_header_path = temp_dir_path / simple_header_name;
FILE *simple_header_file =
fopen(simple_header_path.u8string().c_str(), "w");
if (simple_header_file == NULL)
{
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (fprintf(simple_header_file, "%s", simple_header) < 0)
{
log_error(
"ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (fclose(simple_header_file) != 0)
{
log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (_chdir("../..") != 0)
{
log_error("ERROR: Unable to change to original working directory! (in "
"%s:%d)\n",
__FILE__, __LINE__);
return -1;
}
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL,
const std::string include_path =
std::string("-I") + temp_dir_path.generic_u8string();
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
NULL, NULL, NULL);
#else
error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL,
NULL, NULL);
#endif
test_error(error,
"Unable to compile a simple program with included header");
/* cleanup */
if (_chdir("foo/bar") != 0)
std::filesystem::remove_all(temp_dir_path, ec);
if (ec)
{
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_unlink(simple_header_name) != 0)
{
log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
return -1;
}
if (_chdir("../..") != 0)
{
log_error("ERROR: Unable to change to original working directory! (in "
"%s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_rmdir("foo/bar") != 0)
{
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_rmdir("foo") != 0)
{
log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to delete directory: %s, error: %d (%s)",
temp_dir_path.u8string().c_str(), ec.value(),
ec.message().c_str());
return -1;
}

View File

@@ -95,7 +95,9 @@ const char *known_extensions[] = {
"cl_khr_command_buffer",
"cl_khr_command_buffer_mutable_dispatch",
"cl_khr_command_buffer_multi_device",
"cl_khr_external_memory_android_hardware_buffer"
"cl_khr_external_memory_android_hardware_buffer",
"cl_khr_unified_svm",
"cl_khr_spirv_queries"
};
// clang-format on

View File

@@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit(
compiler_status, supported);
}
static int test_feature_macro_ext_image_unorm_int_2_101010(
cl_device_id deviceID, cl_context context, std::string test_macro_name,
cl_bool& supported)
{
cl_int error = TEST_FAIL;
cl_bool api_status = CL_TRUE;
cl_bool compiler_status;
log_info("\n%s ...\n", test_macro_name.c_str());
if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010"))
{
supported = false;
return TEST_PASS;
}
error = check_compiler_feature_info(deviceID, context, test_macro_name,
compiler_status);
if (error != CL_SUCCESS)
{
return error;
}
return feature_macro_verify_results(test_macro_name, api_status,
compiler_status, supported);
}
static int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
std::string test_macro_name,
cl_bool& supported)
@@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0))
NEW_FEATURE_MACRO_TEST(int64);
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010);
error |= test_consistency_c_features_list(device, supported_features_vec);

View File

@@ -25,6 +25,43 @@
#include <string>
#include <thread>
#include <vector>
#include <fstream>
#if defined(_WIN32)
const std::string slash = "\\";
#else
const std::string slash = "/";
#endif
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
+ slash + "spirv_bin" + slash + "write_kernel.spv";
const std::string spvExt = ".spv";
std::vector<unsigned char> readBinary(const char *file_name)
{
using namespace std;
ifstream file(file_name, ios::in | ios::binary | ios::ate);
std::vector<char> tmpBuffer(0);
if (file.is_open())
{
size_t size = file.tellg();
tmpBuffer.resize(size);
file.seekg(0, ios::beg);
file.read(&tmpBuffer[0], size);
file.close();
}
else
{
log_error("File %s not found\n", file_name);
}
std::vector<unsigned char> result(tmpBuffer.begin(), tmpBuffer.end());
return result;
}
namespace {
@@ -299,18 +336,12 @@ public:
throw unload_test_failure("Failure getting device address bits");
}
switch (address_bits)
{
case 32:
m_spirv_binary = write_kernel_32_spv.data();
m_spirv_size = write_kernel_32_spv.size();
break;
case 64:
m_spirv_binary = write_kernel_64_spv.data();
m_spirv_size = write_kernel_64_spv.size();
break;
default: throw unload_test_failure("Invalid address bits");
}
std::vector<unsigned char> kernel_buffer;
std::string file_name =
compilerSpvBinaries + std::to_string(address_bits);
m_spirv_binary = readBinary(file_name.c_str());
m_spirv_size = m_spirv_binary.size();
}
void create() final
@@ -320,7 +351,7 @@ public:
assert(nullptr == m_program);
cl_int err = CL_INVALID_PLATFORM;
m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0],
m_spirv_size, &err);
if (CL_SUCCESS != err)
throw unload_test_failure("clCreateProgramWithIL()", err);
@@ -347,7 +378,7 @@ public:
}
private:
void *m_spirv_binary;
std::vector<unsigned char> m_spirv_binary;
size_t m_spirv_size;
bool m_enabled;

View File

@@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"(
kernel void write_kernel(global unsigned int *p) {
*p = 42;
})";
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
static std::array<unsigned char, 216> write_kernel_64_spv{
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
};
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
static std::array<unsigned char, 216> write_kernel_32_spv{
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
};

View File

@@ -284,6 +284,11 @@ int main( int argc, const char **argv )
static int ParseArgs( int argc, const char **argv )
{
if (gListTests)
{
return 0;
}
gArgList = (const char **)calloc( argc, sizeof( char*) );
if( NULL == gArgList )

View File

@@ -1,9 +1,5 @@
set(MODULE_NAME CONVERSIONS)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
add_cxx_flag_if_supported(-Wno-narrowing)
endif()
set (${MODULE_NAME}_SOURCES
Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
)

View File

@@ -13,9 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/mathHelpers.h"
#include "harness/testHarness.h"
#include "harness/compat.h"
#include "harness/ThreadPool.h"
#include "harness/parseParameters.h"
#if defined(__APPLE__)
#include <sys/sysctl.h>
@@ -52,17 +54,17 @@
#include "basic_test_conversions.h"
#if defined(_WIN32)
#if defined(_M_IX86) || defined(_M_X64)
#include <mmintrin.h>
#include <emmintrin.h>
#else // !_WIN32
#else
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#endif // _WIN32
#endif
cl_context gContext = NULL;
cl_command_queue gQueue = NULL;
@@ -76,7 +78,6 @@ cl_mem gInBuffer;
cl_mem gOutBuffers[kCallStyleCount];
size_t gComputeDevices = 0;
uint32_t gDeviceFrequency = 0;
int gWimpyMode = 0;
int gWimpyReductionFactor = 128;
int gSkipTesting = 0;
int gForceFTZ = 0;
@@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
// destroyed automatically soon after we exit.
}
template <typename T> static bool isnan_fp(const T &v)
{
if (std::is_same<T, cl_half>::value)
{
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
return (h_exp == 0x1F && h_mant != 0);
}
else
{
#if !defined(_WIN32)
return std::isnan(v);
#else
return _isnan(v);
#endif
}
}
template <typename InType>
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
{

View File

@@ -80,7 +80,6 @@ extern int gHasDouble;
extern int gTestDouble;
extern int gHasHalfs;
extern int gTestHalfs;
extern int gWimpyMode;
extern int gWimpyReductionFactor;
extern int gSkipTesting;
extern int gMinVectorSize;

View File

@@ -343,7 +343,7 @@ float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
volatile float x = f;
float magicVal = magic[f < 0];
#if defined(__SSE__)
#if defined(__SSE__) || _M_IX86_FP == 1
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128 v = _mm_set_ss(x);
@@ -376,7 +376,7 @@ DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
{
volatile double x = f;
double magicVal = magic[f < 0];
#if defined(__SSE2__) || defined(_MSC_VER)
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128d v = _mm_set_sd(x);
@@ -479,7 +479,7 @@ void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
{
if (std::is_same<cl_double, OutType>::value)
{
#if defined(_MSC_VER)
#if defined(_M_IX86) || defined(_M_X64)
double result;
if (std::is_same<cl_ulong, InType>::value)

View File

@@ -112,6 +112,35 @@ int main(int argc, const char **argv)
int error;
argc = parseCustomParam(argc, argv);
if (gListTests)
{
for (unsigned dst = 0; dst < kTypeCount; dst++)
{
for (unsigned src = 0; src < kTypeCount; src++)
{
for (unsigned sat = 0; sat < 2; sat++)
{
// skip illegal saturated conversions to float type
if (gSaturationNames[sat] == std::string("_sat")
&& (gTypeNames[dst] == std::string("float")
|| gTypeNames[dst] == std::string("half")
|| gTypeNames[dst] == std::string("double")))
{
continue;
}
for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++)
{
vlog("\t%s\n",
(std::string(gTypeNames[dst])
+ gSaturationNames[sat] + gRoundingModeNames[rnd]
+ "_" + gTypeNames[src])
.c_str());
}
}
}
}
return 0;
}
if (argc == -1)
{
return 1;
@@ -218,7 +247,6 @@ static int ParseArgs(int argc, const char **argv)
case 'h': gTestHalfs ^= 1; break;
case 'l': gSkipTesting ^= 1; break;
case 'm': gMultithread ^= 1; break;
case 'w': gWimpyMode ^= 1; break;
case '[':
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
break;
@@ -287,14 +315,6 @@ static int ParseArgs(int argc, const char **argv)
}
}
// Check for the wimpy mode environment variable
if (getenv("CL_WIMPY_MODE"))
{
vlog("\n");
vlog("*** Detected CL_WIMPY_MODE env ***\n");
gWimpyMode = 1;
}
vlog("\n");
PrintArch();
@@ -335,9 +355,6 @@ static void PrintUsage(void)
vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
"just check to see that the kernels build. (Off by default.)\n");
vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very "
"small subset of the tests for each fn. NOT A VALID TEST! (Off by "
"default.)\n");
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
"1-12, default factor(%u)\n",
gWimpyReductionFactor);

View File

@@ -1,22 +1,4 @@
if(WIN32)
set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
if(${ARCH} STREQUAL "i686")
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
endif(${ARCH} STREQUAL "i686")
if(${ARCH} STREQUAL "x86_64")
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
endif(${ARCH} STREQUAL "x86_64")
list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR})
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR} )
link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR})
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
set(MODULE_NAME D3D10)
set(${MODULE_NAME}_SOURCES
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
harness.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include(../CMakeCommon.txt)
endif(WIN32)
else()
message(STATUS "D3D10 tests are only supported on Windows.")
endif()

View File

@@ -1,22 +1,4 @@
if(WIN32)
set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
if(${ARCH} STREQUAL "i686")
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
endif(${ARCH} STREQUAL "i686")
if(${ARCH} STREQUAL "x86_64")
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
endif(${ARCH} STREQUAL "x86_64")
list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR})
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR} )
link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR})
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
set(MODULE_NAME D3D11)
set(${MODULE_NAME}_SOURCES
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
harness.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include(../CMakeCommon.txt)
endif(WIN32)
else()
message(STATUS "D3D11 tests are only supported on Windows.")
endif()

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
// clang-format off
static const char* enqueue_simple_block[] = { R"(

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
#define BITS_DEPTH 28
static const char* enqueue_flags_wait_kernel_simple[] =

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -24,10 +25,7 @@
#include <time.h>
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char enqueue_block_multi_queue[] =
NL "#define BLOCK_COMPLETED 0"
NL "#define BLOCK_SUBMITTED 1"

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <algorithm>
#include <vector>
@@ -26,7 +27,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char *helper_ndrange_1d_glo[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static int nestingLevel = 3;
static const char* enqueue_1D_wg_size_single[] =

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char* multi_queue_simple_block1[] =
{
NL, "void block_fn(size_t tid, int mul, __global int* res)"

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <algorithm>
#include <vector>
@@ -24,8 +25,6 @@
#include "utils.h"
#include <time.h>
extern int gWimpyMode;
#ifdef CL_VERSION_2_0
static const char* enqueue_block_first_kernel[] =

View File

@@ -25,7 +25,6 @@
#include "utils.h"
std::string gKernelName;
int gWimpyMode = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -71,11 +70,6 @@ int main(int argc, const char *argv[])
gKernelName = std::string(argv[i + 1]);
argsRemoveNum += 2;
}
if (strcmp(argv[i], "-w") == 0 ){
gWimpyMode = 1;
argsRemoveNum += 1;
}
if (argsRemoveNum > 0) {
for (int j = i; j < (argc - argsRemoveNum); ++j)

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -27,7 +28,6 @@
#ifdef CL_VERSION_2_0
static int gNestingLevel = 4;
extern int gWimpyMode;
static const char* enqueue_nested_blocks_single[] =
{

View File

@@ -5,6 +5,9 @@
add_subdirectory( cl_ext_cxx_for_opencl )
add_subdirectory( cl_khr_command_buffer )
add_subdirectory( cl_khr_dx9_media_sharing )
if(ANDROID_PLATFORM GREATER 28)
add_subdirectory( cl_khr_external_memory_ahb )
endif ()
add_subdirectory( cl_khr_external_memory_dma_buf )
add_subdirectory( cl_khr_semaphore )
add_subdirectory( cl_khr_kernel_clock )

View File

@@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int EnqueueAndReleaseTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
cl_int pattern = 42;
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr,
nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Calls release on cl_command_buffer_khr handle inside wrapper class, and
// sets the handle to nullptr, so that release doesn't get called again at
// end of test when wrapper object is destroyed.
command_buffer.reset();
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
}
return CL_SUCCESS;
}

View File

@@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
bool Skip() override;
};
// Test releasing a command-buffer after it has been submitted for execution,
// but before the user has waited on completion of the enqueue.
struct EnqueueAndReleaseTest : public BasicCommandBufferTest
{
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
};
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)

View File

@@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush)
return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
num_elements);
}
REGISTER_TEST(enqueue_and_release)
{
return MakeAndRunTest<EnqueueAndReleaseTest>(device, context, queue,
num_elements);
}

View File

@@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES
mutable_command_multiple_dispatches.cpp
mutable_command_iterative_arg_update.cpp
mutable_command_work_groups.cpp
mutable_command_work_dim.cpp
mutable_command_update_state.cpp
../basic_command_buffer.cpp
)

View File

@@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
for (size_t i = 0; i < num_elements; i++)
if (i >= update_global_size && global_work_size != resultData[i])
{
log_error("Data failed to verify: update_global_size != "
log_error("Data failed to verify: global_work_size != "
"resultData[%zu]=%d\n",
i, resultData[i]);
return TEST_FAIL;
@@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
size_t info_global_size = 0;
const size_t update_global_size = 3;
const size_t sizeToAllocate = global_work_size;
const size_t sizeToAllocate = global_work_size * sizeof(cl_int);
const size_t num_elements = sizeToAllocate / sizeof(cl_int);
cl_mutable_command_khr command = nullptr;
};

View File

@@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest
: InfoMutableCommandBufferTest(device, context, queue)
{}
virtual bool Skip() override
{
Version device_version = get_device_cl_version(device);
if ((device_version >= Version(3, 0))
|| is_extension_available(device, "cl_khr_extended_versioning"))
{
cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
{
log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 "
"is required to run the test, skipping.\n ");
return true;
}
}
return InfoMutableCommandBufferTest::Skip();
}
cl_int Run() override
{
cl_command_properties_khr props[] = {

View File

@@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
{
cl_int offset;
std::vector<cl_int> output_buffer;
std::vector<cl_int> updated_output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
@@ -337,6 +338,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
// Retain new output memory object until the end of the test.
retained_output_buffers.push_back(new_out_mem);
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
@@ -373,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
pd.updated_output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
@@ -388,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
{ 0, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
@@ -407,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
test_error(error, "clFinish failed");
// verify the result buffers
for (auto&& pass : simul_passes)
auto& first_pass_output = simul_passes[0].output_buffer;
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
auto& second_pass_output = simul_passes[1].output_buffer;
auto& second_pass_updated_output =
simul_passes[1].updated_output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
}
// First pass:
// Before updating, out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
i);
// Second pass:
// Before updating, out_mem is filled with overwritten_pattern
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
i);
}
return CL_SUCCESS;
@@ -429,6 +447,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
clKernelWrapper kernel_fill;
clProgramWrapper program_fill;
std::vector<clMemWrapper> retained_output_buffers;
const size_t test_global_work_size = 3 * sizeof(cl_int);
const cl_int pattern_pri = 42;

View File

@@ -0,0 +1,280 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "mutable_command_basic.h"
#include <CL/cl_ext.h>
#include <vector>
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests related to ensuring the state of the updated command-buffer is expected
// and the effects of operations on it don't have side effects on other objects.
//
// - Tests the updates applied to a command-buffer persist over all subsequent
// enqueues.
// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension.
struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest
{
MutableDispatchUpdateStateTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
buffer(nullptr), command(nullptr)
{}
bool Skip() override
{
if (BasicMutableCommandBufferTest::Skip()) return true;
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !mutable_support;
}
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * sizeof(cl_int), nullptr, &error);
test_error(error, "clCreateBuffer error");
// Zero initialize buffer
const cl_int zero_pattern = 0;
error = clEnqueueFillBuffer(
queue, buffer, &zero_pattern, sizeof(cl_int), 0,
num_elements * sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
test_error(error, "Unable to set kernel argument 0");
return CL_SUCCESS;
}
cl_int SetUpKernel() override
{
const char *add_kernel =
R"(
__kernel void add_kernel(__global int *data, int value)
{
size_t tid = get_global_id(0);
data[tid] += value;
})";
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &add_kernel, "add_kernel");
test_error(error, "Creating kernel failed");
return CL_SUCCESS;
}
bool verify_result(cl_int ref)
{
std::vector<cl_int> data(num_elements);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
if (data[i] != ref)
{
log_error("Modified verification failed at index %zu: Got %d, "
"wanted %d\n",
i, data[i], ref);
return false;
}
}
return true;
}
clMemWrapper buffer;
cl_mutable_command_khr command;
};
struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest
{
MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: MutableDispatchUpdateStateTest(device, context, queue)
{}
cl_int Run() override
{
const cl_int original_val = 42;
cl_int error =
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
test_error(error, "Unable to set kernel argument 1");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Modify the command buffer before executing
const cl_int new_command_val = 5;
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
&new_command_val };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
const unsigned iterations = 5;
for (unsigned i = 0; i < iterations; i++)
{
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
}
// Check the results execution sequence is the clEnqueueNDRangeKernel
// value + the updated command-buffer value, not using the original
// command value in the operation.
constexpr cl_int ref = iterations * new_command_val;
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
}
};
struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest
{
MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: MutableDispatchUpdateStateTest(device, context, queue)
{}
cl_int Run() override
{
const cl_int original_val = 42;
cl_int error =
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
test_error(error, "Unable to set kernel argument 1");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Set new kernel argument for later clEnqueueNDRangeKernel
const cl_int new_eager_val = 10;
error =
clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val);
test_error(error, "Unable to set kernel argument 1");
// Modify the command buffer before executing
const cl_int new_command_val = 5;
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
&new_command_val };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Eager kernel enqueue, followed by command-buffer enqueue
error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Check the results execution sequence is the clEnqueueNDRangeKernel
// value + the updated command-buffer value, not using the original
// command value in the operation.
constexpr cl_int ref = new_eager_val + new_command_val;
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
}
};
}
REGISTER_TEST(mutable_dispatch_updates_persist)
{
return MakeAndRunTest<MutableDispatchUpdatesPersistTest>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_set_kernel_arg)
{
return MakeAndRunTest<MutableDispatchSetKernelArgTest>(device, context,
queue, num_elements);
}

View File

@@ -0,0 +1,225 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <extensionHelpers.h>
#include "mutable_command_basic.h"
#include <array>
#include <cstring>
#include <CL/cl_ext.h>
// mutable dispatch tests setting `work_dim` to the original 3D value
// behaves as expected.
struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest
{
using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
MutableDispatchWorkDim(cl_device_id device, cl_context context,
cl_command_queue queue)
: InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int SetUp(int elements) override
{
result_data.resize(update_total_elements);
return InfoMutableCommandBufferTest::SetUp(elements);
}
bool Skip() override
{
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR);
return !mutable_support || InfoMutableCommandBufferTest::Skip();
}
bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements)
{
std::memset(result_data.data(), 0, alloc_size);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size,
result_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < total_elements; i++)
{
if (result_data[i] != expected_value)
{
log_error("Data failed to verify at index %zu. "
"Expected %u, result was %u\n",
i, expected_value, result_data[i]);
return false;
}
}
return true;
}
cl_int Run() override
{
const char *global_size_kernel =
R"(
__kernel void three_dim(__global uint *dst0,
__global uint *dst1,
__global uint *dst2)
{
size_t gid = get_global_linear_id();
dst0[gid] = get_global_size(0);
dst1[gid] = get_global_size(1);
dst2[gid] = get_global_size(2);
})";
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &global_size_kernel, "three_dim");
test_error(error, "Creating kernel failed");
// Create a buffer for each of the three dimensions to write the
// global size into.
clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3);
test_error(error, "Unable to set indexed kernel arguments");
// Command-buffer contains a single kernel
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Enqueue command-buffer and wait on completion
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue);
test_error(error, "clFinish failed.");
// Verify results before any update
if (!Verify(stream1, global_size_3D[0], original_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream2, global_size_3D[1], original_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream3, global_size_3D[2], original_total_elements))
{
return TEST_FAIL;
}
// Update command with a mutable config where we use a different 3D
// global size, but hardcode `work_dim` to 3 (the original value).
cl_mutable_dispatch_config_khr dispatch_config{
command,
0 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
work_dim /* work_dim */,
nullptr /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
update_global_size_3D.data() /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Enqueue updated command-buffer
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Verify update is reflected in buffer output.
if (!Verify(stream1, update_global_size_3D[0], update_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream2, update_global_size_3D[1], update_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream3, update_global_size_3D[2], update_total_elements))
{
return TEST_FAIL;
}
return CL_SUCCESS;
}
static const cl_uint work_dim = 3;
// 3D global size of kernel command when created
static const size_t original_elements = 2;
static constexpr std::array<size_t, work_dim> global_size_3D = {
original_elements, original_elements, original_elements
};
// 3D global size to update kernel command to.
static const size_t update_elements = 4;
static constexpr std::array<size_t, work_dim> update_global_size_3D = {
update_elements, update_elements, update_elements
};
// Total number of work items in original and updated grids
static const size_t original_total_elements =
original_elements * original_elements * original_elements;
static const size_t update_total_elements =
update_elements * update_elements * update_elements;
// Size in bytes of each of the 3 cl_mem buffers (using the larger size)
static const size_t alloc_size = update_total_elements * sizeof(cl_uint);
cl_mutable_command_khr command = nullptr;
std::vector<cl_uint> result_data;
};
// get_global_linear() used in kernel is an OpenCL 2.0 API
REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0))
{
return MakeAndRunTest<MutableDispatchWorkDim>(device, context, queue,
num_elements);
}

View File

@@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
&trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// verify pending state
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
// execute command buffer
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);

View File

@@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
test_error(error, "State is not Pending");
return CL_SUCCESS;
}

View File

@@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
test_error(error, "State is not Pending");
error = clFinalizeCommandBufferKHR(command_buffer);
test_failure_error_ret(error, CL_INVALID_OPERATION,

View File

@@ -0,0 +1,12 @@
set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB)
set(${MODULE_NAME}_SOURCES
main.cpp
test_ahb.cpp
test_ahb_negative.cpp
debug_ahb.cpp
)
link_libraries(OpenCL nativewindow)
include(../../CMakeCommon.txt)

View File

@@ -0,0 +1,193 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "debug_ahb.h"
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN,
AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER,
AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY,
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK,
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE,
AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER,
AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY,
AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT,
AHARDWAREBUFFER_USAGE_VIDEO_ENCODE,
AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA,
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER,
AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP,
AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE,
AHARDWAREBUFFER_USAGE_FRONT_BUFFER,
};
std::string
ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags)
{
if (flags == 0)
{
return "UNKNOWN FLAG";
}
std::vector<std::string> active_flags;
for (const auto flag : flag_list)
{
if (flag & flags)
{
active_flags.push_back(ahardwareBufferUsageFlagToString(flag));
}
}
if (active_flags.empty())
{
return "UNKNOWN FLAG";
}
return std::accumulate(active_flags.begin() + 1, active_flags.end(),
active_flags.front(),
[](std::string acc, const std::string& flag) {
return std::move(acc) + "|" + flag;
});
}
std::string
ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag)
{
std::string result;
switch (flag)
{
case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_MASK:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK";
break;
case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE:
result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE";
break;
case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER:
result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER";
break;
case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY:
result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY";
break;
case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT:
result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT";
break;
case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE:
result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE";
break;
case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA:
result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA";
break;
case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER:
result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER";
break;
case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP:
result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP";
break;
case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE:
result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE";
break;
default: result = "Unknown flag";
}
return result;
}
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
{
std::string result;
switch (format)
{
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT";
break;
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_BLOB:
result = "AHARDWAREBUFFER_FORMAT_BLOB";
break;
case AHARDWAREBUFFER_FORMAT_D16_UNORM:
result = "AHARDWAREBUFFER_FORMAT_D16_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_D24_UNORM:
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_D32_FLOAT:
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT";
break;
case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420";
break;
case AHARDWAREBUFFER_FORMAT_YCbCr_P010:
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010";
break;
case AHARDWAREBUFFER_FORMAT_YCbCr_P210:
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210";
break;
case AHARDWAREBUFFER_FORMAT_R8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R16_UINT:
result = "AHARDWAREBUFFER_FORMAT_R16_UINT";
break;
case AHARDWAREBUFFER_FORMAT_R16G16_UINT:
result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT";
break;
case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM";
break;
}
return result;
}

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include <android/hardware_buffer.h>
#include <string>
#include <vector>
#include <numeric>
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
{ \
const std::string usage_string = \
ahardwareBufferDecodeUsageFlagsToString( \
static_cast<AHardwareBuffer_UsageFlags>( \
ahardwareBuffer_Desc.usage)); \
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \
"%u, layers = %u)\n", \
ahardwareBufferFormatToString(format.aHardwareBufferFormat) \
.c_str(), \
usage_string.c_str(), ahardwareBuffer_Desc.width, \
ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \
continue; \
}
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
std::string
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);

View File

@@ -0,0 +1,23 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,246 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include "harness/kernelHelpers.h"
#include "harness/imageHelpers.h"
#include "harness/errorHelpers.h"
#include <android/hardware_buffer.h>
#include "debug_ahb.h"
REGISTER_TEST(test_buffer_format_negative)
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage));
log_info(
"Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str());
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str());
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_assert_error(err == CL_INVALID_OPERATION,
"To create a buffer the aHardwareFormat must be "
"AHARDWAREBUFFER_FORMAT_BLOB");
if (buffer != nullptr)
{
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}
REGISTER_TEST(test_buffer_size_negative)
{
cl_int err = CL_SUCCESS;
constexpr size_t buffer_size = 64;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage));
log_info(
"Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str());
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str());
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err);
test_assert_error(err == CL_INVALID_BUFFER_SIZE,
"Wrong error value returned");
if (buffer != nullptr)
{
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}
REGISTER_TEST(test_images_negative)
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
| AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER);
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 64;
aHardwareBufferDesc.layers = 1;
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
const cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
cl_mem image =
clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
&image_format, nullptr, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
"Wrong error value returned");
if (image != nullptr)
{
test_error(clReleaseMemObject(image), "Failed to release image");
}
constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 };
image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
nullptr, &image_desc, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR,
"Wrong error value returned");
if (image != nullptr)
{
test_error(clReleaseMemObject(image), "Failed to release image");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}

View File

@@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
return TEST_PASS;
}
// Confirm that signal a semaphore with no event dependencies will not result
// in an implicit dependency on everything previously submitted
REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2))
{
REQUIRE_EXTENSION("cl_khr_external_semaphore");
if (init_vulkan_device(1, &device))
{
log_info("Cannot initialise Vulkan. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
VulkanDevice vkDevice;
// Obtain pointers to semaphore's API
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
std::vector<VulkanExternalSemaphoreHandleType>
vkExternalSemaphoreHandleTypeList =
getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice);
if (vkExternalSemaphoreHandleTypeList.empty())
{
test_fail("No external semaphore handle types found\n");
}
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList)
{
log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore(vkDevice,
vkExternalSemaphoreHandleType);
auto sema_ext = clExternalImportableSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device);
cl_int err = CL_SUCCESS;
// Create ooo queue
clCommandQueueWrapper queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create user event
clEventWrapper user_event = clCreateUserEvent(context, &err);
test_error(err, "Could not create user event");
// Create Kernel
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "empty");
test_error(err, "Could not create kernel");
// Enqueue task_1 (dependency on user_event)
clEventWrapper task_1_event;
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
test_error(err, "Could not enqueue task 1");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 0, nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 0, nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Flush and delay
err = clFlush(queue);
test_error(err, "Could not flush queue");
cl_event event_list[] = { signal_event, wait_event };
err = clWaitForEvents(2, event_list);
test_error(err, "Could not wait on events");
// Ensure all events are completed except for task_1
test_assert_event_inprogress(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Complete user_event
err = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(err, "Could not set user event to CL_COMPLETE");
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
// Ensure all events are completed
test_assert_event_complete(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
}
return TEST_PASS;
}
// Confirm that a semaphore can be reused multiple times
REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
{

View File

@@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase
}
};
struct SimpleSemaphore2 : public SemaphoreTestBase
{
SimpleSemaphore2(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
: SemaphoreTestBase(device, context, queue, nelems)
{}
cl_int Run() override
{
cl_int err = CL_SUCCESS;
// Create ooo queue
clCommandQueueWrapper queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create semaphore
cl_semaphore_properties_khr sema_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
// Create user event
clEventWrapper user_event = clCreateUserEvent(context, &err);
test_error(err, "Could not create user event");
// Create Kernel
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "empty");
test_error(err, "Could not create kernel");
// Enqueue task_1 (dependency on user_event)
clEventWrapper task_1_event;
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
test_error(err, "Could not enqueue task 1");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Flush and delay
err = clFlush(queue);
test_error(err, "Could not flush queue");
std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
// Ensure all events are completed except for task_1
test_assert_event_inprogress(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Complete user_event
err = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(err, "Could not set user event to CL_COMPLETE");
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
// Ensure all events are completed
test_assert_event_complete(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
return CL_SUCCESS;
}
};
struct SemaphoreReuse : public SemaphoreTestBase
{
SemaphoreReuse(cl_device_id device, cl_context context,
@@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2))
num_elements);
}
// Confirm that signal a semaphore with no event dependencies will not result
// in an implicit dependency on everything previously submitted
REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2))
{
return MakeAndRunTest<SimpleSemaphore2>(device, context, queue,
num_elements);
}
// Confirm that a semaphore can be reused multiple times
REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2))
{

View File

@@ -16,6 +16,7 @@
#include "harness/compat.h"
#include "harness/kernelHelpers.h"
#include "harness/testHarness.h"
#include "harness/parseParameters.h"
#include <string.h>

View File

@@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0;
uint32_t gComputeDevices = 0;
size_t gMaxThreadGroupSize = 0;
size_t gWorkGroupSize = 0;
bool gWimpyMode = false;
int gWimpyReductionFactor = 512;
int gTestDouble = 0;
bool gHostReset = false;

View File

@@ -74,7 +74,6 @@ extern bool gHostReset;
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
// size of 32 bit ranges to a much smaller set. This is meant to be used
// as a smoke test
extern bool gWimpyMode;
extern int gWimpyReductionFactor;
uint64_t ReadTime( void );

View File

@@ -83,13 +83,6 @@ int main (int argc, const char **argv )
if( (error = ParseArgs( argc, argv )) )
goto exit;
if (gIsEmbedded) {
vlog( "\tProfile: Embedded\n" );
}else
{
vlog( "\tProfile: Full\n" );
}
fflush( stdout );
error = runTestHarnessWithCheck(
argCount, argList, test_registry::getInstance().num_tests(),
@@ -114,6 +107,10 @@ exit:
static int ParseArgs( int argc, const char **argv )
{
if (gListTests)
{
return 0;
}
int i;
argList = (const char **)calloc(argc, sizeof(char *));
if( NULL == argList )
@@ -181,9 +178,6 @@ static int ParseArgs( int argc, const char **argv )
case 'r': gHostReset = true; break;
case 'w': // Wimpy mode
gWimpyMode = true;
break;
case '[':
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
break;
@@ -202,12 +196,6 @@ static int ParseArgs( int argc, const char **argv )
}
}
if (getenv("CL_WIMPY_MODE")) {
vlog( "\n" );
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
gWimpyMode = 1;
}
PrintArch();
if( gWimpyMode )
{
@@ -217,6 +205,16 @@ static int ParseArgs( int argc, const char **argv )
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
}
if (gIsEmbedded)
{
vlog("\tProfile: Embedded\n");
}
else
{
vlog("\tProfile: Full\n");
}
return 0;
}
@@ -227,7 +225,6 @@ static void PrintUsage( void )
"supported)\n");
vlog("\t\t-t\tToggle reporting performance data.\n");
vlog("\t\t-r\tReset buffers on host instead of on device.\n");
vlog("\t\t-w\tRun in wimpy mode\n");
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
"1-12, default factor(%u)\n",
gWimpyReductionFactor);

View File

@@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue,
cl_mem_flags buffer_flags = CL_MEM_READ_WRITE;
if (enable_pitch)
{
if (version.major() == 1)
if (version.get_major() == 1)
{
host_ptr = malloc(imageInfo->rowPitch);
}

Some files were not shown because too many files have changed in this diff Show More