diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp index 4dce5052..35d93f84 100644 --- a/test_conformance/math_brute_force/binary_operator_double.cpp +++ b/test_conformance/math_brute_force/binary_operator_double.cpp @@ -214,12 +214,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_double *s; cl_double *s2; - bool reciprocal = strcmp(name, "reciprocal") == 0; - const double reciprocalArrayX[] = { 1.0 }; - const double *specialValuesX = - reciprocal ? reciprocalArrayX : specialValues; - size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount; - Force64BitFPUPrecision(); cl_event e[VECTOR_SIZE_COUNT]; @@ -248,7 +242,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; - int totalSpecialValueCount = specialValuesCountX * specialValuesCount; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; // Test edge cases @@ -258,15 +252,14 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_double *fp2 = (cl_double *)p2; uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesCountX; + x = (job_id * buffer_elements) % specialValuesCount; y = (job_id * buffer_elements) / specialValuesCount; for (; idx < buffer_elements; idx++) { - fp[idx] = specialValuesX[x]; + fp[idx] = specialValues[x]; fp2[idx] = specialValues[y]; - ++x; - if (x >= specialValuesCountX) + if (++x >= specialValuesCount) { x = 0; y++; @@ -278,8 +271,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = - reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d); + p[idx] = genrand_int64(d); p2[idx] = genrand_int64(d); } @@ -372,13 +364,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) r = (cl_double *)gOut_Ref + thread_id * buffer_elements; s = (cl_double *)gIn + thread_id * buffer_elements; s2 = (cl_double *)gIn2 + thread_id * buffer_elements; - - if (reciprocal) - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (float)func.f_f(s2[j]); - else - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (cl_double)func.f_ff(s[j], s2[j]); + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (cl_double)func.f_ff(s[j], s2[j]); // Read the data back -- no need to wait for the first N-1 buffers but wait // for the last buffer. This is an in order queue. @@ -408,9 +395,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (t[j] != q[j]) { cl_double test = ((cl_double *)q)[j]; - long double correct = - reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); - + long double correct = func.f_ff(s[j], s2[j]); float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = !(fabsf(err) <= ulps); @@ -483,11 +468,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } else if (IsDoubleSubnormal(s2[j])) { - long double correct2 = - reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); - long double correct3 = - reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); - + long double correct2 = func.f_ff(s[j], 0.0); + long double correct3 = func.f_ff(s[j], -0.0); float err2 = Bruteforce_Ulp_Error_Double(test, correct2); float err3 = diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index c0c11c2e..cce6e122 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -208,11 +208,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_float *s2 = 0; RoundingMode oldRoundMode; - bool reciprocal = strcmp(name, "reciprocal") == 0; - const float reciprocalArrayX[] = { 1.f }; - const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues; - size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount; - if (relaxedMode) { func = job->f->rfunc; @@ -244,7 +239,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; - int totalSpecialValueCount = specialValuesCountX * specialValuesCount; + int totalSpecialValueCount = specialValuesCount * specialValuesCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; if (job_id <= (cl_uint)lastSpecialJobIndex) @@ -252,15 +247,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesCountX; + x = (job_id * buffer_elements) % specialValuesCount; y = (job_id * buffer_elements) / specialValuesCount; for (; idx < buffer_elements; idx++) { - p[idx] = ((cl_uint *)specialValuesX)[x]; + p[idx] = ((cl_uint *)specialValues)[x]; p2[idx] = ((cl_uint *)specialValues)[y]; ++x; - if (x >= specialValuesCountX) + if (x >= specialValuesCount) { x = 0; y++; @@ -274,19 +269,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; } - else if (relaxedMode && reciprocal) - { - cl_uint p2j = p2[idx] & 0x7fffffff; - // Replace values outside [2^-126, 2^126] with QNaN - if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000; - } } } // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d); + p[idx] = genrand_int32(d); p2[idx] = genrand_int32(d); if (relaxedMode && strcmp(name, "divide") == 0) @@ -297,12 +286,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000; if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000; } - else if (relaxedMode && reciprocal) - { - cl_uint p2j = p2[idx] & 0x7fffffff; - // Replace values outside [2^-126, 2^126] with QNaN - if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000; - } } if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, @@ -408,31 +391,18 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) s2 = (float *)gIn2 + thread_id * buffer_elements; if (gInfNanSupport) { - if (reciprocal) - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (float)func.f_f(s2[j]); - else - for (size_t j = 0; j < buffer_elements; j++) - r[j] = (float)func.f_ff(s[j], s2[j]); + for (size_t j = 0; j < buffer_elements; j++) + r[j] = (float)func.f_ff(s[j], s2[j]); } else { - if (reciprocal) - for (size_t j = 0; j < buffer_elements; j++) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float)func.f_f(s2[j]); - overflow[j] = - FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } - else - for (size_t j = 0; j < buffer_elements; j++) - { - feclearexcept(FE_OVERFLOW); - r[j] = (float)func.f_ff(s[j], s2[j]); - overflow[j] = - FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); - } + for (size_t j = 0; j < buffer_elements; j++) + { + feclearexcept(FE_OVERFLOW); + r[j] = (float)func.f_ff(s[j], s2[j]); + overflow[j] = + FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)); + } } if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat); @@ -467,8 +437,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (t[j] != q[j]) { float test = ((float *)q)[j]; - double correct = - reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); + double correct = func.f_ff(s[j], s2[j]); // Per section 10 paragraph 6, accept any result if an input or // output is a infinity or NaN or overflow @@ -505,7 +474,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } // retry per section 6.5.3.3 - if (!reciprocal && IsFloatSubnormal(s[j])) + if (IsFloatSubnormal(s[j])) { double correct2, correct3; float err2, err3; @@ -611,10 +580,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (!gInfNanSupport) feclearexcept(FE_OVERFLOW); - correct2 = - reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); - correct3 = - reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); + correct2 = func.f_ff(s[j], 0.0); + correct3 = func.f_ff(s[j], -0.0); // Per section 10 paragraph 6, accept any result if an // input or output is a infinity or NaN or overflow @@ -647,6 +614,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } } + if (fabsf(err) > tinfo->maxError) { tinfo->maxError = fabsf(err); diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp index 3bd45857..09d3ea01 100644 --- a/test_conformance/math_brute_force/binary_operator_half.cpp +++ b/test_conformance/math_brute_force/binary_operator_half.cpp @@ -120,12 +120,6 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) std::vector s(0), s2(0); RoundingMode oldRoundMode; - bool reciprocal = strcmp(name, "reciprocal") == 0; - const cl_half reciprocalArrayHalfX[] = { 0x3c00 }; - const cl_half *specialValuesHalfX = - reciprocal ? reciprocalArrayHalfX : specialValuesHalf; - size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount; - cl_event e[VECTOR_SIZE_COUNT]; cl_half *out[VECTOR_SIZE_COUNT]; @@ -154,7 +148,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements; cl_uint idx = 0; int totalSpecialValueCount = - specialValuesHalfCountX * specialValuesHalfCount; + specialValuesHalfCount * specialValuesHalfCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; if (job_id <= (cl_uint)lastSpecialJobIndex) @@ -162,15 +156,14 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesHalfCountX; + x = (job_id * buffer_elements) % specialValuesHalfCount; y = (job_id * buffer_elements) / specialValuesHalfCount; for (; idx < buffer_elements; idx++) { - p[idx] = specialValuesHalfX[x]; + p[idx] = specialValuesHalf[x]; p2[idx] = specialValuesHalf[y]; - ++x; - if (x >= specialValuesHalfCountX) + if (++x >= specialValuesHalfCount) { x = 0; y++; @@ -182,8 +175,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) // Init any remaining values for (; idx < buffer_elements; idx++) { - p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0] - : (cl_half)genrand_int32(d); + p[idx] = (cl_half)genrand_int32(d); p2[idx] = (cl_half)genrand_int32(d); } if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, @@ -280,23 +272,11 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) s.resize(buffer_elements); s2.resize(buffer_elements); - if (reciprocal) + for (size_t j = 0; j < buffer_elements; j++) { - for (size_t j = 0; j < buffer_elements; j++) - { - s[j] = HTF(p[j]); - s2[j] = HTF(p2[j]); - r[j] = HFF(func.f_f(s2[j])); - } - } - else - { - for (size_t j = 0; j < buffer_elements; j++) - { - s[j] = HTF(p[j]); - s2[j] = HTF(p2[j]); - r[j] = HFF(func.f_ff(s[j], s2[j])); - } + s[j] = HTF(p[j]); + s2[j] = HTF(p2[j]); + r[j] = HFF(func.f_ff(s[j], s2[j])); } if (ftz) RestoreFPState(&oldMode); @@ -329,8 +309,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) if (r[j] != q[j]) { float test = HTF(q[j]); - float correct = - reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]); + float correct = func.f_ff(s[j], s2[j]); // Per section 10 paragraph 6, accept any result if an input or // output is a infinity or NaN or overflow @@ -456,10 +435,9 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data) double correct2, correct3; float err2, err3; - correct2 = - reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0); - correct3 = - reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0); + correct2 = func.f_ff(s[j], 0.0); + correct3 = func.f_ff(s[j], -0.0); + // Per section 10 paragraph 6, accept any result if an // input or output is a infinity or NaN or overflow diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 14e0830a..74f29930 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -427,9 +427,8 @@ const Func functionList[] = { // basic operations OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF), OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF), - //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF), { "reciprocal", - "/", + "reciprocal", { (void*)reference_reciprocal }, { (void*)reference_reciprocall }, { (void*)reference_relaxed_reciprocal }, @@ -442,7 +441,7 @@ const Func functionList[] = { INFINITY, FTZ_OFF, RELAXED_ON, - binaryOperatorF }, + unaryF}, { "divide", "/", { (void*)reference_divide }, diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp index 4762a81d..5c537332 100644 --- a/test_conformance/math_brute_force/unary_double.cpp +++ b/test_conformance/math_brute_force/unary_double.cpp @@ -29,7 +29,12 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo &info = *(BuildKernelInfo *)p; auto generator = [](const std::string &kernel_name, const char *builtin, cl_uint vector_size_index) { - return GetUnaryKernel(kernel_name, builtin, ParameterType::Double, + const char *builtinCall = builtin; + if (strcmp(builtin, "reciprocal") == 0) + { + builtinCall = "((RETTYPE)(1.0))/"; + } + return GetUnaryKernel(kernel_name, builtinCall, ParameterType::Double, ParameterType::Double, vector_size_index); }; return BuildKernels(info, job_id, generator); diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index 0a2af3be..ae0a0646 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -28,7 +28,12 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo &info = *(BuildKernelInfo *)p; auto generator = [](const std::string &kernel_name, const char *builtin, cl_uint vector_size_index) { - return GetUnaryKernel(kernel_name, builtin, ParameterType::Float, + const char *builtinCall = builtin; + if (strcmp(builtin, "reciprocal") == 0) + { + builtinCall = "((RETTYPE)(1.0f))/"; + } + return GetUnaryKernel(kernel_name, builtinCall, ParameterType::Float, ParameterType::Float, vector_size_index); }; return BuildKernels(info, job_id, generator); diff --git a/test_conformance/math_brute_force/unary_half.cpp b/test_conformance/math_brute_force/unary_half.cpp index 877e1fad..56565cc3 100644 --- a/test_conformance/math_brute_force/unary_half.cpp +++ b/test_conformance/math_brute_force/unary_half.cpp @@ -28,7 +28,12 @@ cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) BuildKernelInfo &info = *(BuildKernelInfo *)p; auto generator = [](const std::string &kernel_name, const char *builtin, cl_uint vector_size_index) { - return GetUnaryKernel(kernel_name, builtin, ParameterType::Half, + const char *builtinCall = builtin; + if (strcmp(builtin, "reciprocal") == 0) + { + builtinCall = "((RETTYPE)(1.0h))/"; + } + return GetUnaryKernel(kernel_name, builtinCall, ParameterType::Half, ParameterType::Half, vector_size_index); }; return BuildKernels(info, job_id, generator);