Added support for cl_ext_float_atomics in c11_atomics store test along with atomic_half type (#2293)

Related to #2142, according to the work plan extended `CBasicTestStore`
with support for `atomic_half`.

Optimization remark: in tests related to `CBasicTestStore` kernel source
code is mostly composed with arguments following similar pattern:

`__kernel void test_atomic_kernel(uint threadCount, uint numDestItems,
__global int *finalDest, __global int *oldValues, volatile __local
atomic_int *destMemory)`

`oldValues` buffer is initialized with a host pointer, after kernel
execution it is read back to the host pointer but it is unused in
neither of the kernels I verified.
This commit is contained in:
Marcin Hajder
2025-05-27 17:51:36 +02:00
committed by GitHub
parent 110799bb67
commit dec5644112
5 changed files with 134 additions and 54 deletions

View File

@@ -58,30 +58,28 @@ cl_uint AtomicTypeInfo::Size(cl_device_id device)
{
switch(_type)
{
case TYPE_ATOMIC_INT:
case TYPE_ATOMIC_UINT:
case TYPE_ATOMIC_FLOAT:
case TYPE_ATOMIC_FLAG:
return sizeof(cl_int);
case TYPE_ATOMIC_LONG:
case TYPE_ATOMIC_ULONG:
case TYPE_ATOMIC_DOUBLE:
return sizeof(cl_long);
case TYPE_ATOMIC_INTPTR_T:
case TYPE_ATOMIC_UINTPTR_T:
case TYPE_ATOMIC_SIZE_T:
case TYPE_ATOMIC_PTRDIFF_T:
{
int error;
cl_uint addressBits = 0;
case TYPE_ATOMIC_HALF: return sizeof(cl_half);
case TYPE_ATOMIC_INT:
case TYPE_ATOMIC_UINT:
case TYPE_ATOMIC_FLOAT:
case TYPE_ATOMIC_FLAG: return sizeof(cl_int);
case TYPE_ATOMIC_LONG:
case TYPE_ATOMIC_ULONG:
case TYPE_ATOMIC_DOUBLE: return sizeof(cl_long);
case TYPE_ATOMIC_INTPTR_T:
case TYPE_ATOMIC_UINTPTR_T:
case TYPE_ATOMIC_SIZE_T:
case TYPE_ATOMIC_PTRDIFF_T: {
int error;
cl_uint addressBits = 0;
error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addressBits), &addressBits, 0);
test_error_ret(error, "clGetDeviceInfo", 0);
error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
sizeof(addressBits), &addressBits, 0);
test_error_ret(error, "clGetDeviceInfo", 0);
return addressBits/8;
}
default:
return 0;
return addressBits / 8;
}
default: return 0;
}
}
@@ -93,6 +91,7 @@ const char *AtomicTypeInfo::AtomicTypeName()
return "atomic_int";
case TYPE_ATOMIC_UINT:
return "atomic_uint";
case TYPE_ATOMIC_HALF: return "atomic_half";
case TYPE_ATOMIC_FLOAT:
return "atomic_float";
case TYPE_ATOMIC_FLAG:
@@ -124,6 +123,7 @@ const char *AtomicTypeInfo::RegularTypeName()
return "int";
case TYPE_ATOMIC_UINT:
return "uint";
case TYPE_ATOMIC_HALF: return "half";
case TYPE_ATOMIC_FLOAT:
return "float";
case TYPE_ATOMIC_FLAG:
@@ -163,29 +163,30 @@ int AtomicTypeInfo::IsSupported(cl_device_id device)
{
switch(_type)
{
case TYPE_ATOMIC_INT:
case TYPE_ATOMIC_UINT:
case TYPE_ATOMIC_FLOAT:
case TYPE_ATOMIC_FLAG:
return 1;
case TYPE_ATOMIC_LONG:
case TYPE_ATOMIC_ULONG:
return is_extension_available(device, "cl_khr_int64_base_atomics") &&
is_extension_available(device, "cl_khr_int64_extended_atomics");
case TYPE_ATOMIC_DOUBLE:
return is_extension_available(device, "cl_khr_int64_base_atomics") &&
is_extension_available(device, "cl_khr_int64_extended_atomics") &&
is_extension_available(device, "cl_khr_fp64");
case TYPE_ATOMIC_INTPTR_T:
case TYPE_ATOMIC_UINTPTR_T:
case TYPE_ATOMIC_SIZE_T:
case TYPE_ATOMIC_PTRDIFF_T:
if(Size(device) == 4)
return 1;
return is_extension_available(device, "cl_khr_int64_base_atomics") &&
is_extension_available(device, "cl_khr_int64_extended_atomics");
default:
return 0;
case TYPE_ATOMIC_HALF:
return is_extension_available(device, "cl_khr_fp16");
case TYPE_ATOMIC_INT:
case TYPE_ATOMIC_UINT:
case TYPE_ATOMIC_FLOAT:
case TYPE_ATOMIC_FLAG: return 1;
case TYPE_ATOMIC_LONG:
case TYPE_ATOMIC_ULONG:
return is_extension_available(device, "cl_khr_int64_base_atomics")
&& is_extension_available(device,
"cl_khr_int64_extended_atomics");
case TYPE_ATOMIC_DOUBLE:
return is_extension_available(device, "cl_khr_int64_base_atomics")
&& is_extension_available(device, "cl_khr_int64_extended_atomics")
&& is_extension_available(device, "cl_khr_fp64");
case TYPE_ATOMIC_INTPTR_T:
case TYPE_ATOMIC_UINTPTR_T:
case TYPE_ATOMIC_SIZE_T:
case TYPE_ATOMIC_PTRDIFF_T:
if (Size(device) == 4) return 1;
return is_extension_available(device, "cl_khr_int64_base_atomics")
&& is_extension_available(device,
"cl_khr_int64_extended_atomics");
default: return 0;
}
}

View File

@@ -22,6 +22,8 @@
#include "host_atomics.h"
#include "CL/cl_half.h"
#include <vector>
#include <sstream>
@@ -38,6 +40,7 @@ enum TExplicitAtomicType
TYPE_ATOMIC_UINT,
TYPE_ATOMIC_LONG,
TYPE_ATOMIC_ULONG,
TYPE_ATOMIC_HALF,
TYPE_ATOMIC_FLOAT,
TYPE_ATOMIC_DOUBLE,
TYPE_ATOMIC_INTPTR_T,
@@ -71,6 +74,9 @@ extern int
gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern const char *
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
@@ -240,13 +246,13 @@ public:
int error = 0;
if (_maxDeviceThreads > 0 && !UseSVM())
{
LocalMemory(true);
SetLocalMemory(true);
EXECUTE_TEST(
error, ExecuteForEachDeclarationType(deviceID, context, queue));
}
if (_maxDeviceThreads + MaxHostThreads() > 0)
{
LocalMemory(false);
SetLocalMemory(false);
EXECUTE_TEST(
error, ExecuteForEachDeclarationType(deviceID, context, queue));
}
@@ -401,7 +407,7 @@ public:
bool UseSVM() { return _useSVM; }
void StartValue(HostDataType startValue) { _startValue = startValue; }
HostDataType StartValue() { return _startValue; }
void LocalMemory(bool local) { _localMemory = local; }
void SetLocalMemory(bool local) { _localMemory = local; }
bool LocalMemory() { return _localMemory; }
void DeclaredInProgram(bool declaredInProgram)
{
@@ -781,6 +787,8 @@ CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
}
if (_dataType == TYPE_ATOMIC_DOUBLE)
pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (_dataType == TYPE_ATOMIC_HALF)
pragma += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
return pragma;
}

View File

@@ -41,6 +41,7 @@ enum TExplicitMemoryOrderType
#define HOST_ATOMIC_UINT unsigned long
#define HOST_ATOMIC_LONG unsigned long long
#define HOST_ATOMIC_ULONG unsigned long long
#define HOST_ATOMIC_HALF unsigned short
#define HOST_ATOMIC_FLOAT float
#define HOST_ATOMIC_DOUBLE double
#else
@@ -48,6 +49,7 @@ enum TExplicitMemoryOrderType
#define HOST_ATOMIC_UINT cl_uint
#define HOST_ATOMIC_LONG cl_long
#define HOST_ATOMIC_ULONG cl_ulong
#define HOST_ATOMIC_HALF cl_half
#define HOST_ATOMIC_FLOAT cl_float
#define HOST_ATOMIC_DOUBLE cl_double
#endif
@@ -69,6 +71,7 @@ enum TExplicitMemoryOrderType
#define HOST_UINT cl_uint
#define HOST_LONG cl_long
#define HOST_ULONG cl_ulong
#define HOST_HALF cl_half
#define HOST_FLOAT cl_float
#define HOST_DOUBLE cl_double
@@ -120,9 +123,12 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchange(a, c);
if (sizeof(CorrespondingType) == 2)
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
else
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
#elif defined(__GNUC__)
return __sync_lock_test_and_set(a, c);
return __sync_lock_test_and_set(a, c);
#else
log_info("Host function not implemented: atomic_exchange\n");
return 0;

View File

@@ -1,5 +1,5 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -14,8 +14,11 @@
// limitations under the License.
//
#include "harness/testHarness.h"
#include "harness/deviceInfo.h"
#include "harness/kernelHelpers.h"
#include <iostream>
#include <string>
#include "CL/cl_half.h"
bool gHost = false; // flag for testing native host threads (test verification)
bool gOldAPI = false; // flag for testing with old API (OpenCL 1.2) - test verification
@@ -28,6 +31,9 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -123,6 +129,34 @@ test_status InitCL(cl_device_id device) {
| CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES;
}
if (is_extension_available(device, "cl_ext_float_atomics"))
{
gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp16"))
{
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gHalfAtomicCaps), &gHalfAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
const cl_device_fp_config fpConfigHalf =
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
{
gHalfRoundingMode = CL_HALF_RTE;
}
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
{
gHalfRoundingMode = CL_HALF_RTZ;
}
else
{
log_error("Error while acquiring half rounding mode\n");
return TEST_FAIL;
}
}
}
return TEST_PASS;
}

View File

@@ -1,5 +1,5 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@ public:
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
@@ -54,6 +55,21 @@ public:
== TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
._type
== TYPE_ATOMIC_HALF)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
@@ -75,7 +91,13 @@ public:
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = (HostDataType)whichDestValue;
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
._type
!= TYPE_ATOMIC_HALF)
expected = (HostDataType)whichDestValue;
else
expected = cl_half_from_float(static_cast<float>(whichDestValue),
gHalfRoundingMode);
return true;
}
};
@@ -109,6 +131,15 @@ static int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error,
test_double.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestStore<HOST_ATOMIC_HALF, HOST_HALF> test_half(TYPE_ATOMIC_HALF,
useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(