diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 1fca36b8..d321819f 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -24,8 +24,9 @@ #include "CL/cl_half.h" -#include +#include #include +#include #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() @@ -892,14 +893,15 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) header += std::string("__global volatile ") + aTypeName + " destMemory[" + ss.str() + "] = {\n"; ss.str(""); - if (CBasicTest::DataType()._type - != TYPE_ATOMIC_HALF) - ss << _startValue; - else + == TYPE_ATOMIC_FLOAT) + ss << std::setprecision(10) << _startValue; + else if (CBasicTest::DataType()._type + == TYPE_ATOMIC_HALF) ss << static_cast( cl_half_to_float(static_cast(_startValue))); - + else + ss << _startValue; for (cl_uint i = 0; i < maxNumDestItems; i++) { if (aTypeName == "atomic_flag") diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index e1787849..d9482fb7 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -172,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp TExplicitMemoryOrderType order_success, TExplicitMemoryOrderType order_failure) { - CorrespondingType tmp; -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - tmp = InterlockedCompareExchange(a, desired, *expected); + CorrespondingType tmp; + if constexpr (std::is_same_v) + { + static std::mutex mtx; + std::lock_guard lock(mtx); + tmp = *reinterpret_cast(a); + if (tmp == *expected) + { + *reinterpret_cast(a) = desired; + return true; + } + *expected = tmp; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + + tmp = InterlockedCompareExchange(a, desired, *expected); #elif defined(__GNUC__) - tmp = __sync_val_compare_and_swap(a, *expected, desired); + tmp = __sync_val_compare_and_swap(a, *expected, desired); #else - log_info("Host function not implemented: atomic_compare_exchange\n"); - tmp = 0; + log_info("Host function not implemented: atomic_compare_exchange\n"); + tmp = 0; #endif - if(tmp == *expected) - return true; - *expected = tmp; - return false; + if (tmp == *expected) return true; + *expected = tmp; + } + return false; } template diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index f089d6da..485445f7 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -138,6 +138,7 @@ test_status InitCL(cl_device_id device) { device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 73599dc1..b51f4461 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -2608,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2) template class CBasicTestFetchMin : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_min" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_min(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_min( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_min( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] < expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] < expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] < expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + for (cl_uint i = 1; i < threadCount; i++) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_min_generic(cl_device_id deviceID, @@ -2680,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMin test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMin @@ -2742,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min) template class CBasicTestFetchMax : public CBasicTestMemOrderScope { + double min_range; + double max_range; + public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) + useSVM), + min_range(-999.0), max_range(999.0) { StartValue(DataType().MinValue()); + if constexpr (std::is_same_v) + { + CBasicTestMemOrderScope::OldValueCheck(false); + } } - virtual std::string ProgramCore() + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_max" + postfix - + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], - MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, - MTdata d) - { - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - startRefValues[i] = genrand_int32(d); - if (sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + return " atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + + ");\n"; + } + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + if constexpr (std::is_same_v) + { + host_atomic_fetch_max(&destMemory[0], oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_max( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_max( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = get_random_float(min_range, max_range, d); + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + { + cl_ulong v = startRefValues[i]; + v |= (cl_ulong)genrand_int32(d) << 16; + startRefValues[i] = v; + } + } } return true; } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) + if constexpr (std::is_same_v) { - if (startRefValues[i] > expected) expected = startRefValues[i]; + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + if (startRefValues[i] > expected) + expected = startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] > expected) expected = startRefValues[i]; + } } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return CBasicTestMemOrderScope:: + IsTestNotAsExpected(expected, testValues, whichDestValue); + return false; // ignore all but 0 which stores final result + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_max_generic(cl_device_id deviceID, @@ -2814,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchMax test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMax