mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Support building for Windows on 64-bit Arm (#2355)
Support to build for Windows on Arm.
This commit is contained in:
committed by
GitHub
parent
4115d04ae0
commit
aef863afa2
@@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p)
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
_mm_mfence();
|
||||
#elif defined(_M_ARM64)
|
||||
__dmb(_ARM64_BARRIER_ISHST);
|
||||
#else
|
||||
#error Architecture needs an implementation
|
||||
#endif
|
||||
|
||||
#else
|
||||
if (pthread_mutex_lock(&gAtomicLock))
|
||||
log_error(
|
||||
@@ -703,7 +710,13 @@ void ThreadPool_Exit(void)
|
||||
// http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
__sync_synchronize();
|
||||
#elif defined(_MSC_VER)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
_mm_mfence();
|
||||
#elif defined(_M_ARM64)
|
||||
__dmb(_ARM64_BARRIER_ISHST);
|
||||
#else
|
||||
#error Architecture needs an implementation
|
||||
#endif
|
||||
#else
|
||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||
#endif
|
||||
|
||||
@@ -23,10 +23,10 @@
|
||||
|
||||
#include <CL/cl_half.h>
|
||||
|
||||
#if defined(__SSE__) || defined(_MSC_VER)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@@ -110,7 +110,7 @@ static long lrintf_clamped(float f)
|
||||
volatile float x = f;
|
||||
float magicVal = magic[f < 0];
|
||||
|
||||
#if defined(__SSE__) || defined(_WIN32)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128 v = _mm_set_ss(x);
|
||||
@@ -150,7 +150,7 @@ static long lrint_clamped(double f)
|
||||
{
|
||||
volatile double x = f;
|
||||
double magicVal = magic[f < 0];
|
||||
#if defined(__SSE2__) || (defined(_MSC_VER))
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128d v = _mm_set_sd(x);
|
||||
|
||||
@@ -37,36 +37,44 @@ typedef int FPU_mode_type;
|
||||
#else
|
||||
typedef int64_t FPU_mode_type;
|
||||
#endif
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(_M_ARM64)
|
||||
#include <intrin.h>
|
||||
#elif defined(__PPC__)
|
||||
#include <fpu_control.h>
|
||||
extern __thread fpu_control_t fpu_control;
|
||||
#elif defined(__mips__)
|
||||
#include "mips/m32c1.h"
|
||||
#endif
|
||||
|
||||
// Set the reference hardware floating point unit to FTZ mode
|
||||
inline void ForceFTZ(FPU_mode_type *mode)
|
||||
inline void ForceFTZ(FPU_mode_type *oldMode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr(*mode | 0x8040);
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
*oldMode = _mm_getcsr();
|
||||
_mm_setcsr(*oldMode | 0x8040);
|
||||
#elif defined(__PPC__)
|
||||
*mode = fpu_control;
|
||||
*oldMode = fpu_control;
|
||||
fpu_control |= _FPU_MASK_NI;
|
||||
#elif defined(__arm__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
uint64_t fpscr;
|
||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
*oldMode = fpscr;
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||
#elif defined(__mips__)
|
||||
fpa_bissr(FPA_CSR_FS);
|
||||
#else
|
||||
@@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode)
|
||||
}
|
||||
|
||||
// Disable the denorm flush to zero
|
||||
inline void DisableFTZ(FPU_mode_type *mode)
|
||||
inline void DisableFTZ(FPU_mode_type *oldMode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr(*mode & ~0x8040);
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
*oldMode = _mm_getcsr();
|
||||
_mm_setcsr(*oldMode & ~0x8040);
|
||||
#elif defined(__PPC__)
|
||||
*mode = fpu_control;
|
||||
fpu_control &= ~_FPU_MASK_NI;
|
||||
#elif defined(__arm__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
uint64_t fpscr;
|
||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
*oldMode = fpscr;
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||
#elif defined(__mips__)
|
||||
fpa_bicsr(FPA_CSR_FS);
|
||||
#else
|
||||
@@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode)
|
||||
// Restore the reference hardware to floating point state indicated by *mode
|
||||
inline void RestoreFPState(FPU_mode_type *mode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
_mm_setcsr(*mode);
|
||||
#elif defined(__PPC__)
|
||||
fpu_control = *mode;
|
||||
#elif defined(__arm__)
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
_WriteStatusReg(ARM64_FPCR, *mode);
|
||||
#elif defined(__mips__)
|
||||
// Mips runs by default with DAZ=1 FTZ=1
|
||||
#else
|
||||
@@ -125,4 +140,4 @@ inline void RestoreFPState(FPU_mode_type *mode)
|
||||
#error ForceFTZ and RestoreFPState need implentations
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern)
|
||||
#endif // !__has_builtin(__builtin_clz)
|
||||
|
||||
#include <intrin.h>
|
||||
#if !defined(_M_ARM64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
|
||||
@@ -193,7 +193,8 @@ RoundingMode get_round(void)
|
||||
// basic_test_conversions.c in which case, these function are at
|
||||
// liberty to do nothing.
|
||||
//
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(__PPC__)
|
||||
#include <fpu_control.h>
|
||||
@@ -203,18 +204,24 @@ RoundingMode get_round(void)
|
||||
void *FlushToZero(void)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
union {
|
||||
unsigned int i;
|
||||
void *p;
|
||||
} u = { _mm_getcsr() };
|
||||
_mm_setcsr(u.i | 0x8040);
|
||||
return u.p;
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||
int64_t fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||
return NULL;
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||
return NULL;
|
||||
#elif defined(__PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
@@ -237,16 +244,21 @@ void *FlushToZero(void)
|
||||
void UnFlushToZero(void *p)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
union {
|
||||
void *p;
|
||||
unsigned int i;
|
||||
} u = { p };
|
||||
_mm_setcsr(u.i);
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||
int64_t fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||
#elif defined(__PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
|
||||
Reference in New Issue
Block a user