mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Support building for Windows on 64-bit Arm (#2355)
Support to build for Windows on Arm.
This commit is contained in:
committed by
GitHub
parent
4115d04ae0
commit
aef863afa2
2
.github/workflows/presubmit.yml
vendored
2
.github/workflows/presubmit.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
matrix:
|
||||
build-type: [Release]
|
||||
gl: [0]
|
||||
os: [ubuntu-22.04, macos-latest, windows-latest]
|
||||
os: [ubuntu-22.04, macos-latest, windows-latest, windows-11-arm]
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
gl: 1
|
||||
|
||||
@@ -436,7 +436,14 @@ void *ThreadPool_WorkerFunc(void *p)
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
_mm_mfence();
|
||||
#elif defined(_M_ARM64)
|
||||
__dmb(_ARM64_BARRIER_ISHST);
|
||||
#else
|
||||
#error Architecture needs an implementation
|
||||
#endif
|
||||
|
||||
#else
|
||||
if (pthread_mutex_lock(&gAtomicLock))
|
||||
log_error(
|
||||
@@ -703,7 +710,13 @@ void ThreadPool_Exit(void)
|
||||
// http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
__sync_synchronize();
|
||||
#elif defined(_MSC_VER)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
_mm_mfence();
|
||||
#elif defined(_M_ARM64)
|
||||
__dmb(_ARM64_BARRIER_ISHST);
|
||||
#else
|
||||
#error Architecture needs an implementation
|
||||
#endif
|
||||
#else
|
||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||
#endif
|
||||
|
||||
@@ -23,10 +23,10 @@
|
||||
|
||||
#include <CL/cl_half.h>
|
||||
|
||||
#if defined(__SSE__) || defined(_MSC_VER)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@@ -110,7 +110,7 @@ static long lrintf_clamped(float f)
|
||||
volatile float x = f;
|
||||
float magicVal = magic[f < 0];
|
||||
|
||||
#if defined(__SSE__) || defined(_WIN32)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128 v = _mm_set_ss(x);
|
||||
@@ -150,7 +150,7 @@ static long lrint_clamped(double f)
|
||||
{
|
||||
volatile double x = f;
|
||||
double magicVal = magic[f < 0];
|
||||
#if defined(__SSE2__) || (defined(_MSC_VER))
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128d v = _mm_set_sd(x);
|
||||
|
||||
@@ -37,36 +37,44 @@ typedef int FPU_mode_type;
|
||||
#else
|
||||
typedef int64_t FPU_mode_type;
|
||||
#endif
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(_M_ARM64)
|
||||
#include <intrin.h>
|
||||
#elif defined(__PPC__)
|
||||
#include <fpu_control.h>
|
||||
extern __thread fpu_control_t fpu_control;
|
||||
#elif defined(__mips__)
|
||||
#include "mips/m32c1.h"
|
||||
#endif
|
||||
|
||||
// Set the reference hardware floating point unit to FTZ mode
|
||||
inline void ForceFTZ(FPU_mode_type *mode)
|
||||
inline void ForceFTZ(FPU_mode_type *oldMode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr(*mode | 0x8040);
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
*oldMode = _mm_getcsr();
|
||||
_mm_setcsr(*oldMode | 0x8040);
|
||||
#elif defined(__PPC__)
|
||||
*mode = fpu_control;
|
||||
*oldMode = fpu_control;
|
||||
fpu_control |= _FPU_MASK_NI;
|
||||
#elif defined(__arm__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
uint64_t fpscr;
|
||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
*oldMode = fpscr;
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||
#elif defined(__mips__)
|
||||
fpa_bissr(FPA_CSR_FS);
|
||||
#else
|
||||
@@ -75,26 +83,31 @@ inline void ForceFTZ(FPU_mode_type *mode)
|
||||
}
|
||||
|
||||
// Disable the denorm flush to zero
|
||||
inline void DisableFTZ(FPU_mode_type *mode)
|
||||
inline void DisableFTZ(FPU_mode_type *oldMode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr(*mode & ~0x8040);
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
*oldMode = _mm_getcsr();
|
||||
_mm_setcsr(*oldMode & ~0x8040);
|
||||
#elif defined(__PPC__)
|
||||
*mode = fpu_control;
|
||||
fpu_control &= ~_FPU_MASK_NI;
|
||||
#elif defined(__arm__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
uint64_t fpscr;
|
||||
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
*oldMode = fpscr;
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
*oldMode = fpscr;
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||
#elif defined(__mips__)
|
||||
fpa_bicsr(FPA_CSR_FS);
|
||||
#else
|
||||
@@ -105,16 +118,18 @@ inline void DisableFTZ(FPU_mode_type *mode)
|
||||
// Restore the reference hardware to floating point state indicated by *mode
|
||||
inline void RestoreFPState(FPU_mode_type *mode)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||
|| defined(__MINGW32__)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64) || defined(__MINGW32__)
|
||||
_mm_setcsr(*mode);
|
||||
#elif defined(__PPC__)
|
||||
fpu_control = *mode;
|
||||
#elif defined(__arm__)
|
||||
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
|
||||
// Add 64 bit support
|
||||
#elif defined(__aarch64__)
|
||||
#elif defined(__aarch64__) // Clang
|
||||
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
_WriteStatusReg(ARM64_FPCR, *mode);
|
||||
#elif defined(__mips__)
|
||||
// Mips runs by default with DAZ=1 FTZ=1
|
||||
#else
|
||||
@@ -125,4 +140,4 @@ inline void RestoreFPState(FPU_mode_type *mode)
|
||||
#error ForceFTZ and RestoreFPState need implentations
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -786,7 +786,9 @@ int __builtin_clz(unsigned int pattern)
|
||||
#endif // !__has_builtin(__builtin_clz)
|
||||
|
||||
#include <intrin.h>
|
||||
#if !defined(_M_ARM64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
|
||||
@@ -193,7 +193,8 @@ RoundingMode get_round(void)
|
||||
// basic_test_conversions.c in which case, these function are at
|
||||
// liberty to do nothing.
|
||||
//
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(__PPC__)
|
||||
#include <fpu_control.h>
|
||||
@@ -203,18 +204,24 @@ RoundingMode get_round(void)
|
||||
void *FlushToZero(void)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
union {
|
||||
unsigned int i;
|
||||
void *p;
|
||||
} u = { _mm_getcsr() };
|
||||
_mm_setcsr(u.i | 0x8040);
|
||||
return u.p;
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||
int64_t fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||
return NULL;
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr | (1U << 24));
|
||||
return NULL;
|
||||
#elif defined(__PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
@@ -237,16 +244,21 @@ void *FlushToZero(void)
|
||||
void UnFlushToZero(void *p)
|
||||
{
|
||||
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \
|
||||
|| defined(_M_X64)
|
||||
union {
|
||||
void *p;
|
||||
unsigned int i;
|
||||
} u = { p };
|
||||
_mm_setcsr(u.i);
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
#elif defined(__arm__) || defined(__aarch64__) // Clang
|
||||
int64_t fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||
#elif defined(_M_ARM64) // Visual Studio
|
||||
uint64_t fpscr;
|
||||
fpscr = _ReadStatusReg(ARM64_FPSR);
|
||||
_WriteStatusReg(ARM64_FPCR, fpscr & ~(1U << 24));
|
||||
#elif defined(__PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
|
||||
@@ -53,17 +53,17 @@
|
||||
|
||||
#include "basic_test_conversions.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
#include <mmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#else // !_WIN32
|
||||
#else
|
||||
#if defined(__SSE__)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
#endif
|
||||
|
||||
cl_context gContext = NULL;
|
||||
cl_command_queue gQueue = NULL;
|
||||
|
||||
@@ -343,7 +343,7 @@ float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
||||
volatile float x = f;
|
||||
float magicVal = magic[f < 0];
|
||||
|
||||
#if defined(__SSE__)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128 v = _mm_set_ss(x);
|
||||
@@ -376,7 +376,7 @@ DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
||||
{
|
||||
volatile double x = f;
|
||||
double magicVal = magic[f < 0];
|
||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128d v = _mm_set_sd(x);
|
||||
@@ -479,7 +479,7 @@ void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
||||
{
|
||||
if (std::is_same<cl_double, OutType>::value)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
double result;
|
||||
|
||||
if (std::is_same<cl_ulong, InType>::value)
|
||||
|
||||
@@ -25,12 +25,10 @@
|
||||
|
||||
#include "utility.h"
|
||||
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@@ -855,8 +853,7 @@ double reference_add(double x, double y)
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
@@ -953,8 +950,7 @@ double reference_subtract(double x, double y)
|
||||
{
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
@@ -970,8 +966,7 @@ double reference_multiply(double x, double y)
|
||||
{
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
|
||||
Reference in New Issue
Block a user