mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Added corrections to re-enable reciprocal test in math_brute_force suite for relaxed math mode (#2221)
fixes #2145 As suggested by @svenvh reciprocal has different precision requirements than divide. This PR introduces special path for reciprocal for binar_float_operator to test reciprocal with relaxed math. If this PR will get approvals, invalidate PR #2162
This commit is contained in:
@@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_double *s;
|
||||
cl_double *s2;
|
||||
|
||||
bool reciprocal = strcmp(name, "reciprocal") == 0;
|
||||
const double reciprocalArrayX[] = { 1.0 };
|
||||
const double *specialValuesX =
|
||||
reciprocal ? reciprocalArrayX : specialValues;
|
||||
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
|
||||
|
||||
Force64BitFPUPrecision();
|
||||
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
@@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
// Test edge cases
|
||||
@@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
x = (job_id * buffer_elements) % specialValuesCountX;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[idx] = specialValues[x];
|
||||
fp[idx] = specialValuesX[x];
|
||||
fp2[idx] = specialValues[y];
|
||||
if (++x >= specialValuesCount)
|
||||
++x;
|
||||
if (x >= specialValuesCountX)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
@@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init any remaining values
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = genrand_int64(d);
|
||||
p[idx] =
|
||||
reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
|
||||
p2[idx] = genrand_int64(d);
|
||||
}
|
||||
|
||||
@@ -375,6 +383,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
|
||||
if (reciprocal)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)func.f_f(s2[j]);
|
||||
else
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (cl_double)func.f_ff(s[j], s2[j]);
|
||||
|
||||
@@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
cl_double test = ((cl_double *)q)[j];
|
||||
long double correct = func.f_ff(s[j], s2[j]);
|
||||
long double correct =
|
||||
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
|
||||
|
||||
float err = Bruteforce_Ulp_Error_Double(test, correct);
|
||||
int fail = !(fabsf(err) <= ulps);
|
||||
|
||||
@@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
else if (IsDoubleSubnormal(s2[j]))
|
||||
{
|
||||
long double correct2 = func.f_ff(s[j], 0.0);
|
||||
long double correct3 = func.f_ff(s[j], -0.0);
|
||||
long double correct2 =
|
||||
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
|
||||
long double correct3 =
|
||||
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
|
||||
|
||||
float err2 =
|
||||
Bruteforce_Ulp_Error_Double(test, correct2);
|
||||
float err3 =
|
||||
|
||||
@@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_float *s2 = 0;
|
||||
RoundingMode oldRoundMode;
|
||||
|
||||
bool reciprocal = strcmp(name, "reciprocal") == 0;
|
||||
const float reciprocalArrayX[] = { 1.f };
|
||||
const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
|
||||
size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
|
||||
|
||||
if (relaxedMode)
|
||||
{
|
||||
func = job->f->rfunc;
|
||||
@@ -239,7 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesCount;
|
||||
int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
@@ -247,15 +252,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Insert special values
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesCount;
|
||||
x = (job_id * buffer_elements) % specialValuesCountX;
|
||||
y = (job_id * buffer_elements) / specialValuesCount;
|
||||
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = ((cl_uint *)specialValues)[x];
|
||||
p[idx] = ((cl_uint *)specialValuesX)[x];
|
||||
p2[idx] = ((cl_uint *)specialValues)[y];
|
||||
++x;
|
||||
if (x >= specialValuesCount)
|
||||
if (x >= specialValuesCountX)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
@@ -269,13 +274,19 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
else if (relaxedMode && reciprocal)
|
||||
{
|
||||
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||
// Replace values outside [2^-126, 2^126] with QNaN
|
||||
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = genrand_int32(d);
|
||||
p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
|
||||
p2[idx] = genrand_int32(d);
|
||||
|
||||
if (relaxedMode && strcmp(name, "divide") == 0)
|
||||
@@ -286,6 +297,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
|
||||
if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
else if (relaxedMode && reciprocal)
|
||||
{
|
||||
cl_uint p2j = p2[idx] & 0x7fffffff;
|
||||
// Replace values outside [2^-126, 2^126] with QNaN
|
||||
if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -402,11 +419,24 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if (gInfNanSupport)
|
||||
{
|
||||
if (reciprocal)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)func.f_f(s2[j]);
|
||||
else
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
r[j] = (float)func.f_ff(s[j], s2[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (reciprocal)
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float)func.f_f(s2[j]);
|
||||
overflow[j] =
|
||||
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
else
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
@@ -448,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
float test = ((float *)q)[j];
|
||||
double correct = func.f_ff(s[j], s2[j]);
|
||||
double correct =
|
||||
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or
|
||||
// output is a infinity or NaN or overflow
|
||||
@@ -485,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsFloatSubnormal(s[j]))
|
||||
if (!reciprocal && IsFloatSubnormal(s[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
@@ -591,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
correct2 = func.f_ff(s[j], 0.0);
|
||||
correct3 = func.f_ff(s[j], -0.0);
|
||||
correct2 =
|
||||
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
|
||||
correct3 =
|
||||
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an
|
||||
// input or output is a infinity or NaN or overflow
|
||||
@@ -625,7 +658,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (fabsf(err) > tinfo->maxError)
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
|
||||
@@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
std::vector<float> s(0), s2(0);
|
||||
RoundingMode oldRoundMode;
|
||||
|
||||
bool reciprocal = strcmp(name, "reciprocal") == 0;
|
||||
const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
|
||||
const cl_half *specialValuesHalfX =
|
||||
reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
|
||||
size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;
|
||||
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_half *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
@@ -148,7 +154,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount =
|
||||
specialValuesHalfCount * specialValuesHalfCount;
|
||||
specialValuesHalfCountX * specialValuesHalfCount;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
@@ -156,14 +162,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Insert special values
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCountX;
|
||||
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = specialValuesHalf[x];
|
||||
p[idx] = specialValuesHalfX[x];
|
||||
p2[idx] = specialValuesHalf[y];
|
||||
if (++x >= specialValuesHalfCount)
|
||||
++x;
|
||||
if (x >= specialValuesHalfCountX)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
@@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Init any remaining values
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = (cl_half)genrand_int32(d);
|
||||
p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
|
||||
: (cl_half)genrand_int32(d);
|
||||
p2[idx] = (cl_half)genrand_int32(d);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
@@ -283,12 +291,24 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
s.resize(buffer_elements);
|
||||
s2.resize(buffer_elements);
|
||||
|
||||
if (reciprocal)
|
||||
{
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = HTF(p[j]);
|
||||
s2[j] = HTF(p2[j]);
|
||||
r[j] = HFF(func.f_f(s2[j]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = HTF(p[j]);
|
||||
s2[j] = HTF(p2[j]);
|
||||
r[j] = HFF(func.f_ff(s[j], s2[j]));
|
||||
}
|
||||
}
|
||||
|
||||
if (ftz) RestoreFPState(&oldMode);
|
||||
|
||||
@@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
if (r[j] != q[j])
|
||||
{
|
||||
float test = HTF(q[j]);
|
||||
float correct = func.f_ff(s[j], s2[j]);
|
||||
float correct =
|
||||
reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or
|
||||
// output is a infinity or NaN or overflow
|
||||
@@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
correct2 = func.f_ff(s[j], 0.0);
|
||||
correct3 = func.f_ff(s[j], -0.0);
|
||||
|
||||
correct2 =
|
||||
reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
|
||||
correct3 =
|
||||
reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an
|
||||
// input or output is a infinity or NaN or overflow
|
||||
|
||||
@@ -78,6 +78,10 @@
|
||||
#define reference_copysign NULL
|
||||
#define reference_sqrt NULL
|
||||
#define reference_sqrtl NULL
|
||||
#define reference_reciprocal NULL
|
||||
#define reference_reciprocall NULL
|
||||
#define reference_relaxed_reciprocal NULL
|
||||
|
||||
#define reference_divide NULL
|
||||
#define reference_dividel NULL
|
||||
#define reference_relaxed_divide NULL
|
||||
@@ -346,7 +350,6 @@ const Func functionList[] = {
|
||||
|
||||
ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
|
||||
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||
ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
@@ -418,6 +421,21 @@ const Func functionList[] = {
|
||||
// basic operations
|
||||
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
//ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
|
||||
{ "reciprocal",
|
||||
"/",
|
||||
{ (void*)reference_reciprocal },
|
||||
{ (void*)reference_reciprocall },
|
||||
{ (void*)reference_relaxed_reciprocal },
|
||||
2.5f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
3.0f,
|
||||
2.5f,
|
||||
INFINITY,
|
||||
FTZ_OFF,
|
||||
RELAXED_ON,
|
||||
binaryOperatorF },
|
||||
{ "divide",
|
||||
"/",
|
||||
{ (void*)reference_divide },
|
||||
|
||||
@@ -154,7 +154,7 @@ static int doTest(const char *name)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (func_data->func.p == NULL)
|
||||
if (func_data->func.p == NULL && func_data->rfunc.p == NULL)
|
||||
{
|
||||
vlog("'%s' is missing implementation, skipping function.\n",
|
||||
func_data->name);
|
||||
@@ -308,9 +308,10 @@ static test_definition test_list[] = {
|
||||
ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10),
|
||||
ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
|
||||
ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan),
|
||||
ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide),
|
||||
ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment),
|
||||
ADD_TEST(not ), ADD_TEST(erf), ADD_TEST(erfc),
|
||||
ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(reciprocal),
|
||||
ADD_TEST(divide), ADD_TEST(divide_cr), ADD_TEST(multiply),
|
||||
ADD_TEST(assignment), ADD_TEST(not ), ADD_TEST(erf),
|
||||
ADD_TEST(erfc),
|
||||
};
|
||||
|
||||
#undef ADD_TEST
|
||||
|
||||
@@ -1856,6 +1856,13 @@ double reference_logb(double x)
|
||||
|
||||
double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
|
||||
|
||||
long double reference_reciprocall(long double y)
|
||||
{
|
||||
double dx = 1.0;
|
||||
double dy = y;
|
||||
return dx / dy;
|
||||
}
|
||||
|
||||
double reference_reciprocal(double x) { return 1.0 / x; }
|
||||
|
||||
double reference_remainder(double x, double y)
|
||||
@@ -3740,9 +3747,6 @@ long double reference_nanl(cl_ulong x)
|
||||
return (long double)u.f;
|
||||
}
|
||||
|
||||
|
||||
long double reference_reciprocall(long double x) { return 1.0L / x; }
|
||||
|
||||
long double reference_remainderl(long double x, long double y)
|
||||
{
|
||||
int i;
|
||||
|
||||
Reference in New Issue
Block a user