mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
initial RISC-V support (#2614)
Unlike related PR #2344 that simply warns about unsupported FTZ, this PR attempts to correctly handle FTZ on RISC-V. RISC-V 'f' extension does not support any way to enable/disable flushing subnormals to zero, implementations are required to always support subnormals. Therefore this PR re-uses FTZ handling code from PPC, where flushing also has to be explicitly performed.
This commit is contained in:
@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __PPC__
|
||||
#if defined(__PPC__) || defined(__riscv)
|
||||
float ppc_mul(float a, float b)
|
||||
{
|
||||
float p;
|
||||
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
|
||||
// turn that off
|
||||
f3[i] = sse_mul(q, q2);
|
||||
f4[i] = sse_mul(-q, q2);
|
||||
#elif defined(__PPC__)
|
||||
// None of the current generation PPC processors support HW
|
||||
// FTZ, emulate it in sw.
|
||||
#elif (defined(__PPC__) || defined(__riscv))
|
||||
// RISC-V CPUs with default 'f' fp32 extension do not support
|
||||
// enabling/disabling FTZ mode, subnormals are always handled
|
||||
// without FTZ. None of the current generation PPC processors
|
||||
// support HW FTZ, emulate it in sw.
|
||||
f3[i] = ppc_mul(q, q2);
|
||||
f4[i] = ppc_mul(-q, q2);
|
||||
#else
|
||||
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
|
||||
skipTest[j][i] = (bufSkip[i] ||
|
||||
(gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
|
||||
|
||||
#if defined(__PPC__)
|
||||
// Since the current Power processors don't emulate flush to zero in HW,
|
||||
// it must be emulated in SW instead.
|
||||
#if defined(__PPC__) || defined(__riscv)
|
||||
// Since the current Power processors don't emulate flush to
|
||||
// zero in HW, it must be emulated in SW instead. (same for
|
||||
// RISC-V CPUs with 'f' extension)
|
||||
if (gForceFTZ)
|
||||
{
|
||||
if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
|
||||
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double *f = (double*) buf1;
|
||||
double *f2 = (double*) buf2;
|
||||
double *f3 = (double*) buf3_double;
|
||||
|
||||
@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
|
||||
uint64_t GetTime(void);
|
||||
|
||||
void WriteInputBufferComplete(void *);
|
||||
void *FlushToZero(void);
|
||||
void UnFlushToZero(void *);
|
||||
}
|
||||
|
||||
struct CalcRefValsBase
|
||||
|
||||
@@ -859,7 +859,9 @@ double reference_add(double x, double y)
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
va = _mm_add_ss(va, vb);
|
||||
_mm_store_ss((float *)&a, va);
|
||||
#elif defined(__PPC__)
|
||||
#elif defined(__PPC__) || defined(__riscv)
|
||||
// RISC-V CPUs with default 'f' fp32 extension do not support any way to
|
||||
// enable/disable FTZ mode, subnormals are always handled without flushing.
|
||||
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
|
||||
// denorm's to zero. As such, the reference add with FTZ must be emulated in
|
||||
// sw.
|
||||
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
|
||||
} ub;
|
||||
ub.d = b;
|
||||
cl_uint mantA, mantB;
|
||||
cl_ulong addendA, addendB, sum;
|
||||
cl_ulong addendA, addendB;
|
||||
int expA = extractf(a, &mantA);
|
||||
int expB = extractf(b, &mantB);
|
||||
cl_uint signA = ua.u & 0x80000000U;
|
||||
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
va = _mm_mul_ss(va, vb);
|
||||
_mm_store_ss((float *)&a, va);
|
||||
#elif defined(__PPC__)
|
||||
#elif defined(__PPC__) || defined(__riscv)
|
||||
// Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
|
||||
// denorm's to zero. As such, the reference multiply with FTZ must be
|
||||
// emulated in sw.
|
||||
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
|
||||
|
||||
long double reference_rintl(long double x)
|
||||
{
|
||||
#if defined(__PPC__)
|
||||
#if defined(__PPC__) || defined(__riscv)
|
||||
// On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
|
||||
// mantissa can represent more than LDBL_MANT_DIG binary digits.
|
||||
x = rintl(x);
|
||||
|
||||
Reference in New Issue
Block a user