diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp index 668d7b50..414d877b 100644 --- a/test_conformance/c11_atomics/common.cpp +++ b/test_conformance/c11_atomics/common.cpp @@ -58,30 +58,28 @@ cl_uint AtomicTypeInfo::Size(cl_device_id device) { switch(_type) { - case TYPE_ATOMIC_INT: - case TYPE_ATOMIC_UINT: - case TYPE_ATOMIC_FLOAT: - case TYPE_ATOMIC_FLAG: - return sizeof(cl_int); - case TYPE_ATOMIC_LONG: - case TYPE_ATOMIC_ULONG: - case TYPE_ATOMIC_DOUBLE: - return sizeof(cl_long); - case TYPE_ATOMIC_INTPTR_T: - case TYPE_ATOMIC_UINTPTR_T: - case TYPE_ATOMIC_SIZE_T: - case TYPE_ATOMIC_PTRDIFF_T: - { - int error; - cl_uint addressBits = 0; + case TYPE_ATOMIC_HALF: return sizeof(cl_half); + case TYPE_ATOMIC_INT: + case TYPE_ATOMIC_UINT: + case TYPE_ATOMIC_FLOAT: + case TYPE_ATOMIC_FLAG: return sizeof(cl_int); + case TYPE_ATOMIC_LONG: + case TYPE_ATOMIC_ULONG: + case TYPE_ATOMIC_DOUBLE: return sizeof(cl_long); + case TYPE_ATOMIC_INTPTR_T: + case TYPE_ATOMIC_UINTPTR_T: + case TYPE_ATOMIC_SIZE_T: + case TYPE_ATOMIC_PTRDIFF_T: { + int error; + cl_uint addressBits = 0; - error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addressBits), &addressBits, 0); - test_error_ret(error, "clGetDeviceInfo", 0); + error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, + sizeof(addressBits), &addressBits, 0); + test_error_ret(error, "clGetDeviceInfo", 0); - return addressBits/8; - } - default: - return 0; + return addressBits / 8; + } + default: return 0; } } @@ -93,6 +91,7 @@ const char *AtomicTypeInfo::AtomicTypeName() return "atomic_int"; case TYPE_ATOMIC_UINT: return "atomic_uint"; + case TYPE_ATOMIC_HALF: return "atomic_half"; case TYPE_ATOMIC_FLOAT: return "atomic_float"; case TYPE_ATOMIC_FLAG: @@ -124,6 +123,7 @@ const char *AtomicTypeInfo::RegularTypeName() return "int"; case TYPE_ATOMIC_UINT: return "uint"; + case TYPE_ATOMIC_HALF: return "half"; case TYPE_ATOMIC_FLOAT: return "float"; case TYPE_ATOMIC_FLAG: @@ -163,29 +163,30 @@ int AtomicTypeInfo::IsSupported(cl_device_id device) { switch(_type) { - case TYPE_ATOMIC_INT: - case TYPE_ATOMIC_UINT: - case TYPE_ATOMIC_FLOAT: - case TYPE_ATOMIC_FLAG: - return 1; - case TYPE_ATOMIC_LONG: - case TYPE_ATOMIC_ULONG: - return is_extension_available(device, "cl_khr_int64_base_atomics") && - is_extension_available(device, "cl_khr_int64_extended_atomics"); - case TYPE_ATOMIC_DOUBLE: - return is_extension_available(device, "cl_khr_int64_base_atomics") && - is_extension_available(device, "cl_khr_int64_extended_atomics") && - is_extension_available(device, "cl_khr_fp64"); - case TYPE_ATOMIC_INTPTR_T: - case TYPE_ATOMIC_UINTPTR_T: - case TYPE_ATOMIC_SIZE_T: - case TYPE_ATOMIC_PTRDIFF_T: - if(Size(device) == 4) - return 1; - return is_extension_available(device, "cl_khr_int64_base_atomics") && - is_extension_available(device, "cl_khr_int64_extended_atomics"); - default: - return 0; + case TYPE_ATOMIC_HALF: + return is_extension_available(device, "cl_khr_fp16"); + case TYPE_ATOMIC_INT: + case TYPE_ATOMIC_UINT: + case TYPE_ATOMIC_FLOAT: + case TYPE_ATOMIC_FLAG: return 1; + case TYPE_ATOMIC_LONG: + case TYPE_ATOMIC_ULONG: + return is_extension_available(device, "cl_khr_int64_base_atomics") + && is_extension_available(device, + "cl_khr_int64_extended_atomics"); + case TYPE_ATOMIC_DOUBLE: + return is_extension_available(device, "cl_khr_int64_base_atomics") + && is_extension_available(device, "cl_khr_int64_extended_atomics") + && is_extension_available(device, "cl_khr_fp64"); + case TYPE_ATOMIC_INTPTR_T: + case TYPE_ATOMIC_UINTPTR_T: + case TYPE_ATOMIC_SIZE_T: + case TYPE_ATOMIC_PTRDIFF_T: + if (Size(device) == 4) return 1; + return is_extension_available(device, "cl_khr_int64_base_atomics") + && is_extension_available(device, + "cl_khr_int64_extended_atomics"); + default: return 0; } } diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 76d1fe27..64e59b54 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -22,6 +22,8 @@ #include "host_atomics.h" +#include "CL/cl_half.h" + #include #include @@ -38,6 +40,7 @@ enum TExplicitAtomicType TYPE_ATOMIC_UINT, TYPE_ATOMIC_LONG, TYPE_ATOMIC_ULONG, + TYPE_ATOMIC_HALF, TYPE_ATOMIC_FLOAT, TYPE_ATOMIC_DOUBLE, TYPE_ATOMIC_INTPTR_T, @@ -71,6 +74,9 @@ extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device +extern cl_half_rounding_mode gHalfRoundingMode; +extern bool gFloatAtomicsSupported; +extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; extern const char * get_memory_order_type_name(TExplicitMemoryOrderType orderType); @@ -240,13 +246,13 @@ public: int error = 0; if (_maxDeviceThreads > 0 && !UseSVM()) { - LocalMemory(true); + SetLocalMemory(true); EXECUTE_TEST( error, ExecuteForEachDeclarationType(deviceID, context, queue)); } if (_maxDeviceThreads + MaxHostThreads() > 0) { - LocalMemory(false); + SetLocalMemory(false); EXECUTE_TEST( error, ExecuteForEachDeclarationType(deviceID, context, queue)); } @@ -401,7 +407,7 @@ public: bool UseSVM() { return _useSVM; } void StartValue(HostDataType startValue) { _startValue = startValue; } HostDataType StartValue() { return _startValue; } - void LocalMemory(bool local) { _localMemory = local; } + void SetLocalMemory(bool local) { _localMemory = local; } bool LocalMemory() { return _localMemory; } void DeclaredInProgram(bool declaredInProgram) { @@ -781,6 +787,8 @@ CBasicTest::PragmaHeader(cl_device_id deviceID) } if (_dataType == TYPE_ATOMIC_DOUBLE) pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + if (_dataType == TYPE_ATOMIC_HALF) + pragma += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; return pragma; } diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index efa36ad3..017bf25a 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -41,6 +41,7 @@ enum TExplicitMemoryOrderType #define HOST_ATOMIC_UINT unsigned long #define HOST_ATOMIC_LONG unsigned long long #define HOST_ATOMIC_ULONG unsigned long long +#define HOST_ATOMIC_HALF unsigned short #define HOST_ATOMIC_FLOAT float #define HOST_ATOMIC_DOUBLE double #else @@ -48,6 +49,7 @@ enum TExplicitMemoryOrderType #define HOST_ATOMIC_UINT cl_uint #define HOST_ATOMIC_LONG cl_long #define HOST_ATOMIC_ULONG cl_ulong +#define HOST_ATOMIC_HALF cl_half #define HOST_ATOMIC_FLOAT cl_float #define HOST_ATOMIC_DOUBLE cl_double #endif @@ -69,6 +71,7 @@ enum TExplicitMemoryOrderType #define HOST_UINT cl_uint #define HOST_LONG cl_long #define HOST_ULONG cl_ulong +#define HOST_HALF cl_half #define HOST_FLOAT cl_float #define HOST_DOUBLE cl_double @@ -120,9 +123,12 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType TExplicitMemoryOrderType order) { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchange(a, c); + if (sizeof(CorrespondingType) == 2) + return InterlockedExchange16(reinterpret_cast(a), c); + else + return InterlockedExchange(reinterpret_cast(a), c); #elif defined(__GNUC__) - return __sync_lock_test_and_set(a, c); + return __sync_lock_test_and_set(a, c); #else log_info("Host function not implemented: atomic_exchange\n"); return 0; diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 4b37f745..40972b26 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2024 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -14,8 +14,11 @@ // limitations under the License. // #include "harness/testHarness.h" +#include "harness/deviceInfo.h" +#include "harness/kernelHelpers.h" #include #include +#include "CL/cl_half.h" bool gHost = false; // flag for testing native host threads (test verification) bool gOldAPI = false; // flag for testing with old API (OpenCL 1.2) - test verification @@ -28,6 +31,9 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device +cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; +bool gFloatAtomicsSupported = false; +cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -123,6 +129,34 @@ test_status InitCL(cl_device_id device) { | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES; } + if (is_extension_available(device, "cl_ext_float_atomics")) + { + gFloatAtomicsSupported = true; + if (is_extension_available(device, "cl_khr_fp16")) + { + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(gHalfAtomicCaps), &gHalfAtomicCaps, nullptr); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + } + } + return TEST_PASS; } diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 95e0c13f..649d8aa4 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2024 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ public: using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTest::CheckCapabilities; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) @@ -54,6 +55,21 @@ public: == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable + if (CBasicTestMemOrderScope::DataType() + ._type + == TYPE_ATOMIC_HALF) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); @@ -75,7 +91,13 @@ public: HostDataType *startRefValues, cl_uint whichDestValue) { - expected = (HostDataType)whichDestValue; + if (CBasicTestMemOrderScope::DataType() + ._type + != TYPE_ATOMIC_HALF) + expected = (HostDataType)whichDestValue; + else + expected = cl_half_from_float(static_cast(whichDestValue), + gHalfRoundingMode); return true; } }; @@ -109,6 +131,15 @@ static int test_atomic_store_generic(cl_device_id deviceID, cl_context context, TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestStore test_half(TYPE_ATOMIC_HALF, + useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestStore test_intptr_t(