diff --git a/CMakeLists.txt b/CMakeLists.txt index 30a64447..3dbd7944 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") set(CLConform_TARGET_ARCH x86_64) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*") set(CLConform_TARGET_ARCH x86) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*") + set(CLConform_TARGET_ARCH RISCV) endif() if(NOT DEFINED CLConform_TARGET_ARCH) diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h index afb0f5a3..9c1e0db0 100644 --- a/test_common/harness/fpcontrol.h +++ b/test_common/harness/fpcontrol.h @@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type; #elif defined(__PPC__) #include extern __thread fpu_control_t fpu_control; +#elif defined(__riscv) +#define _FPU_MASK_NI 1 +static FPU_mode_type fpu_control; #elif defined(__mips__) #include "mips/m32c1.h" #endif @@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode) || defined(_M_X64) || defined(__MINGW32__) *oldMode = _mm_getcsr(); _mm_setcsr(*oldMode | 0x8040); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) *oldMode = fpu_control; fpu_control |= _FPU_MASK_NI; #elif defined(__arm__) @@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode) || defined(_M_X64) || defined(__MINGW32__) *oldMode = _mm_getcsr(); _mm_setcsr(*oldMode & ~0x8040); -#elif defined(__PPC__) - *mode = fpu_control; +#elif defined(__PPC__) || defined(__riscv) + *oldMode = fpu_control; fpu_control &= ~_FPU_MASK_NI; #elif defined(__arm__) unsigned fpscr; @@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode) #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) \ || defined(_M_X64) || defined(__MINGW32__) _mm_setcsr(*mode); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) fpu_control = *mode; #elif defined(__arm__) __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp index 5aeb86f1..31b18f75 100644 --- a/test_common/harness/rounding_mode.cpp +++ b/test_common/harness/rounding_mode.cpp @@ -201,6 +201,7 @@ RoundingMode get_round(void) #elif defined(__mips__) #include "mips/m32c1.h" #endif + void *FlushToZero(void) { #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) @@ -231,6 +232,8 @@ void *FlushToZero(void) #elif defined(__mips__) fpa_bissr(FPA_CSR_FS); return NULL; +#elif defined(__riscv) + return NULL; #else #error Unknown arch #endif @@ -266,6 +269,8 @@ void UnFlushToZero(void *p) _FPU_SETCW(flags); #elif defined(__mips__) fpa_bicsr(FPA_CSR_FS); +#elif defined(__riscv) + return; #else #error Unknown arch #endif diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 301b86d0..6f1d1505 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -1409,6 +1409,8 @@ void PrintArch(void) vlog("ARCH:\tWindows\n"); #elif defined(__mips__) vlog("ARCH:\tmips\n"); +#elif defined(__riscv) + vlog("ARCH:\tRISC-V\n"); #else #error unknown arch #endif diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp index 0c868764..b3f1098d 100644 --- a/test_conformance/contractions/contractions.cpp +++ b/test_conformance/contractions/contractions.cpp @@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y) } #endif -#ifdef __PPC__ +#if defined(__PPC__) || defined(__riscv) float ppc_mul(float a, float b) { float p; @@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device ) // turn that off f3[i] = sse_mul(q, q2); f4[i] = sse_mul(-q, q2); -#elif defined(__PPC__) - // None of the current generation PPC processors support HW - // FTZ, emulate it in sw. +#elif (defined(__PPC__) || defined(__riscv)) + // RISC-V CPUs with default 'f' fp32 extension do not support + // enabling/disabling FTZ mode, subnormals are always handled + // without FTZ. None of the current generation PPC processors + // support HW FTZ, emulate it in sw. f3[i] = ppc_mul(q, q2); f4[i] = ppc_mul(-q, q2); #else @@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device ) skipTest[j][i] = (bufSkip[i] || (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))))); -#if defined(__PPC__) - // Since the current Power processors don't emulate flush to zero in HW, - // it must be emulated in SW instead. +#if defined(__PPC__) || defined(__riscv) + // Since the current Power processors don't emulate flush to + // zero in HW, it must be emulated in SW instead. (same for + // RISC-V CPUs with 'f' extension) if (gForceFTZ) { if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f)) @@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device ) } } - double *f = (double*) buf1; double *f2 = (double*) buf2; double *f3 = (double*) buf3_double; diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 6846f780..496ea730 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); uint64_t GetTime(void); void WriteInputBufferComplete(void *); -void *FlushToZero(void); -void UnFlushToZero(void *); } struct CalcRefValsBase diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp index a66e6f7e..183edc74 100644 --- a/test_conformance/math_brute_force/reference_math.cpp +++ b/test_conformance/math_brute_force/reference_math.cpp @@ -859,7 +859,9 @@ double reference_add(double x, double y) __m128 vb = _mm_set_ss((float)b); va = _mm_add_ss(va, vb); _mm_store_ss((float *)&a, va); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) + // RISC-V CPUs with default 'f' fp32 extension do not support any way to + // enable/disable FTZ mode, subnormals are always handled without flushing. // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // denorm's to zero. As such, the reference add with FTZ must be emulated in // sw. @@ -876,7 +878,7 @@ double reference_add(double x, double y) } ub; ub.d = b; cl_uint mantA, mantB; - cl_ulong addendA, addendB, sum; + cl_ulong addendA, addendB; int expA = extractf(a, &mantA); int expB = extractf(b, &mantB); cl_uint signA = ua.u & 0x80000000U; @@ -972,7 +974,7 @@ double reference_multiply(double x, double y) __m128 vb = _mm_set_ss((float)b); va = _mm_mul_ss(va, vb); _mm_store_ss((float *)&a, va); -#elif defined(__PPC__) +#elif defined(__PPC__) || defined(__riscv) // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes // denorm's to zero. As such, the reference multiply with FTZ must be // emulated in sw. @@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x) long double reference_rintl(long double x) { -#if defined(__PPC__) +#if defined(__PPC__) || defined(__riscv) // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined // mantissa can represent more than LDBL_MANT_DIG binary digits. x = rintl(x);