// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef HOST_ATOMICS_H_ #define HOST_ATOMICS_H_ #include "harness/testHarness.h" #include #include "CL/cl_half.h" #ifdef WIN32 #include "Windows.h" #endif extern cl_half_rounding_mode gHalfRoundingMode; //flag for test verification (good test should discover non-atomic functions and fail) //#define NON_ATOMIC_FUNCTIONS enum TExplicitMemoryOrderType { MEMORY_ORDER_EMPTY, MEMORY_ORDER_RELAXED, MEMORY_ORDER_ACQUIRE, MEMORY_ORDER_RELEASE, MEMORY_ORDER_ACQ_REL, MEMORY_ORDER_SEQ_CST }; // Wrapper class for half-precision class HostHalf { public: // Convert from semantic values HostHalf(cl_uint value = 0) : value( cl_half_from_float(static_cast(value), gHalfRoundingMode)) {} HostHalf(int value): HostHalf(static_cast(value)) {} HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode)) {} HostHalf(double value): HostHalf(static_cast(value)) {} // Convert to semantic values operator cl_uint() const { return static_cast(cl_half_to_float(value)); } operator float() const { return cl_half_to_float(value); } operator double() const { return static_cast(cl_half_to_float(value)); } // Construct from bit representation HostHalf(cl_half value): value(value) {} // Get the underlying bit representation operator cl_half() const { return value; } HostHalf operator-() const { return HostHalf( cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode)); } #define GENERIC_OP(RetType, op) \ RetType operator op(const HostHalf &other) const \ { \ return RetType(cl_half_to_float(value) \ op cl_half_to_float(other.value)); \ } GENERIC_OP(bool, ==) GENERIC_OP(bool, !=) GENERIC_OP(bool, <) GENERIC_OP(bool, <=) GENERIC_OP(bool, >) GENERIC_OP(bool, >=) GENERIC_OP(HostHalf, +) GENERIC_OP(HostHalf, -) GENERIC_OP(HostHalf, *) GENERIC_OP(HostHalf, /) #undef GENERIC_OP #define INPLACE_OP(op) \ HostHalf &operator op##=(const HostHalf &other) \ { \ value = cl_half_from_float(cl_half_to_float(value) \ op cl_half_to_float(other.value), \ gHalfRoundingMode); \ return *this; \ } INPLACE_OP(+) INPLACE_OP(-) INPLACE_OP(*) INPLACE_OP(/) #undef INPLACE_OP friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh) { float f = cl_half_to_float(hh.value); os << f; return os; } private: cl_half value; }; namespace std { inline HostHalf abs(const HostHalf &value) { return value < HostHalf(0) ? -value : value; } } // namespace std // host atomic types (applicable for atomic functions supported on host OS) #ifdef WIN32 #define HOST_ATOMIC_INT unsigned long #define HOST_ATOMIC_UINT unsigned long #define HOST_ATOMIC_LONG unsigned long long #define HOST_ATOMIC_ULONG unsigned long long #define HOST_ATOMIC_HALF unsigned short #define HOST_ATOMIC_FLOAT float #define HOST_ATOMIC_DOUBLE double #else #define HOST_ATOMIC_INT cl_int #define HOST_ATOMIC_UINT cl_uint #define HOST_ATOMIC_LONG cl_long #define HOST_ATOMIC_ULONG cl_ulong #define HOST_ATOMIC_HALF cl_half #define HOST_ATOMIC_FLOAT cl_float #define HOST_ATOMIC_DOUBLE cl_double #endif #define HOST_ATOMIC_INTPTR_T32 HOST_ATOMIC_INT #define HOST_ATOMIC_UINTPTR_T32 HOST_ATOMIC_UINT #define HOST_ATOMIC_SIZE_T32 HOST_ATOMIC_UINT #define HOST_ATOMIC_PTRDIFF_T32 HOST_ATOMIC_INT #define HOST_ATOMIC_INTPTR_T64 HOST_ATOMIC_LONG #define HOST_ATOMIC_UINTPTR_T64 HOST_ATOMIC_ULONG #define HOST_ATOMIC_SIZE_T64 HOST_ATOMIC_ULONG #define HOST_ATOMIC_PTRDIFF_T64 HOST_ATOMIC_LONG #define HOST_ATOMIC_FLAG HOST_ATOMIC_INT // host regular types corresponding to atomic types #define HOST_INT cl_int #define HOST_UINT cl_uint #define HOST_LONG cl_long #define HOST_ULONG cl_ulong #define HOST_HALF HostHalf #define HOST_FLOAT cl_float #define HOST_DOUBLE cl_double #define HOST_INTPTR_T32 cl_int #define HOST_UINTPTR_T32 cl_uint #define HOST_SIZE_T32 cl_uint #define HOST_PTRDIFF_T32 cl_int #define HOST_INTPTR_T64 cl_long #define HOST_UINTPTR_T64 cl_ulong #define HOST_SIZE_T64 cl_ulong #define HOST_PTRDIFF_T64 cl_long #define HOST_FLAG cl_int extern cl_half_rounding_mode gHalfRoundingMode; template constexpr bool is_host_atomic_fp_v = std::disjunction_v, std::is_same, std::is_same>; template constexpr bool is_host_fp_v = std::disjunction_v, std::is_same, std::is_same>; // host atomic functions void host_atomic_thread_fence(TExplicitMemoryOrderType order); template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { if constexpr (is_host_atomic_fp_v) { static std::mutex mx; std::lock_guard lock(mx); CorrespondingType old_value = *a; CorrespondingType new_value = old_value + c; *a = static_cast(new_value); return old_value; } else { #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) return InterlockedExchangeAdd(a, c); #elif defined(__GNUC__) return __sync_fetch_and_add(a, c); #else log_info("Host function not implemented: atomic_fetch_add\n"); return 0; #endif } } template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { if constexpr (is_host_atomic_fp_v) { static std::mutex mx; std::lock_guard lock(mx); CorrespondingType old_value = *a; CorrespondingType new_value = old_value - c; *a = static_cast(new_value); return old_value; } else { #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) return InterlockedExchangeSubtract(a, c); #elif defined(__GNUC__) return __sync_fetch_and_sub(a, c); #else log_info("Host function not implemented: atomic_fetch_sub\n"); return 0; #endif } } template CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) if constexpr (sizeof(CorrespondingType) == 2) return InterlockedExchange16(reinterpret_cast(a), *reinterpret_cast(&c)); else return InterlockedExchange(reinterpret_cast(a), *reinterpret_cast(&c)); #elif defined(__GNUC__) return __sync_lock_test_and_set(a, *reinterpret_cast(&c)); #else log_info("Host function not implemented: atomic_exchange\n"); return 0; #endif } template <> HOST_FLOAT host_atomic_exchange(volatile HOST_ATOMIC_FLOAT *a, HOST_FLOAT c, TExplicitMemoryOrderType order); template <> HOST_DOUBLE host_atomic_exchange(volatile HOST_ATOMIC_DOUBLE *a, HOST_DOUBLE c, TExplicitMemoryOrderType order); template bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *expected, CorrespondingType desired, TExplicitMemoryOrderType order_success, TExplicitMemoryOrderType order_failure) { CorrespondingType tmp; if constexpr (is_host_atomic_fp_v) { static std::mutex mtx; std::lock_guard lock(mtx); tmp = static_cast(*a); if (tmp == *expected) { *a = static_cast(desired); return true; } *expected = tmp; } else { #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) tmp = InterlockedCompareExchange(a, desired, *expected); #elif defined(__GNUC__) tmp = __sync_val_compare_and_swap(a, *expected, desired); #else log_info("Host function not implemented: atomic_compare_exchange\n"); tmp = 0; #endif if (tmp == *expected) return true; *expected = tmp; } return false; } template CorrespondingType host_atomic_load(volatile AtomicType *a, TExplicitMemoryOrderType order) { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) if constexpr (sizeof(CorrespondingType) == 2) return InterlockedOr16(reinterpret_cast(a), 0); else return InterlockedExchangeAdd(reinterpret_cast(a), 0); #elif defined(__GNUC__) return __sync_add_and_fetch(a, 0); #else log_info("Host function not implemented: atomic_load\n"); return 0; #endif } template <> HOST_FLOAT host_atomic_load(volatile HOST_ATOMIC_FLOAT *a, TExplicitMemoryOrderType order); template <> HOST_DOUBLE host_atomic_load(volatile HOST_ATOMIC_DOUBLE *a, TExplicitMemoryOrderType order); template void host_atomic_store(volatile AtomicType* a, CorrespondingType c, TExplicitMemoryOrderType order) { host_atomic_exchange(a, c, order); } template void host_atomic_init(volatile AtomicType* a, CorrespondingType c) { host_atomic_exchange(a, c, MEMORY_ORDER_RELAXED); } template CorrespondingType host_atomic_fetch_or(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { CorrespondingType expected = host_atomic_load(a, order); CorrespondingType desired; do desired = expected | c; while(!host_atomic_compare_exchange(a, &expected, desired, order, order)); return expected; } template CorrespondingType host_atomic_fetch_and(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { CorrespondingType expected = host_atomic_load(a, order); CorrespondingType desired; do desired = expected & c; while(!host_atomic_compare_exchange(a, &expected, desired, order, order)); return expected; } template CorrespondingType host_atomic_fetch_xor(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { CorrespondingType expected = host_atomic_load(a, order); CorrespondingType desired; do desired = expected ^ c; while(!host_atomic_compare_exchange(a, &expected, desired, order, order)); return expected; } template CorrespondingType host_atomic_fetch_min(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { CorrespondingType expected = host_atomic_load(a, order); CorrespondingType desired; do desired = expected < c ? expected : c; while(!host_atomic_compare_exchange(a, &expected, desired, order, order)); return expected; } template CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { CorrespondingType expected = host_atomic_load(a, order); CorrespondingType desired; do desired = expected > c ? expected : c; while(!host_atomic_compare_exchange(a, &expected, desired, order, order)); return expected; } bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); #endif // HOST_ATOMICS_H_