From 8d4a8700597dccbd81e9292620269854196b8a81 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 15 Jul 2025 09:01:19 -0700 Subject: [PATCH] fix correctly rounded behavior for math bruteforce tests (#2397) fixes #2387 Corrects the "correctly rounded" behavior for the math bruteforce tests. Specifically: * Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive this build option. This means that there is a difference in the behavior of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr` tests, and the "correctly rounded" build option is not applied to the fp16 or fp64 tests. * Removes the build option to toggle testing the correctly rounded divide and square root tests since it no longer needed. Instead, the test names can be used to choose whether to test the correctly rounded functions or the non-correctly rounded functions. Additionally: * Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed to pass this test on some of our devices. This part is still under discussion, so I will keep this PR as a draft until it is settled. --- .../math_brute_force/binary_operator_float.cpp | 6 ++++-- test_conformance/math_brute_force/common.cpp | 16 ++++++++-------- test_conformance/math_brute_force/common.h | 3 +++ .../math_brute_force/function_list.cpp | 4 ++-- test_conformance/math_brute_force/main.cpp | 12 ------------ .../math_brute_force/unary_float.cpp | 6 ++++-- 6 files changed, 21 insertions(+), 26 deletions(-) diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp index cce6e122..17eb998f 100644 --- a/test_conformance/math_brute_force/binary_operator_float.cpp +++ b/test_conformance/math_brute_force/binary_operator_float.cpp @@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } + bool correctlyRounded = strcmp(f->name, "divide_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp index df45a700..257e2595 100644 --- a/test_conformance/math_brute_force/common.cpp +++ b/test_conformance/math_brute_force/common.cpp @@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel, if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; } -std::string GetBuildOptions(bool relaxed_mode) +std::string GetBuildOptions(const BuildKernelInfo &info) { std::ostringstream options; @@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode) options << " -cl-denorms-are-zero"; } - if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) - { - options << " -cl-fp32-correctly-rounded-divide-sqrt"; - } - - if (relaxed_mode) + if (info.relaxedMode) { options << " -cl-fast-relaxed-math"; } + if (info.correctlyRounded) + { + options << " -cl-fp32-correctly-rounded-divide-sqrt"; + } + return options.str(); } @@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id, // Create the program. clProgramWrapper &program = info.programs[vector_size_index]; - auto options = GetBuildOptions(info.relaxedMode); + auto options = GetBuildOptions(info); int error = create_single_kernel_helper(gContext, &program, nullptr, sources.size(), sources.data(), nullptr, options.c_str()); diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h index 3f89ef6c..d7e70a71 100644 --- a/test_conformance/math_brute_force/common.h +++ b/test_conformance/math_brute_force/common.h @@ -84,6 +84,9 @@ struct BuildKernelInfo // Whether to build with -cl-fast-relaxed-math. bool relaxedMode; + + // Whether to build with -cl-fp32-correctly-rounded-divide-sqrt. + bool correctlyRounded; }; // Data common to all math tests. diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp index 90731ea0..408a394a 100644 --- a/test_conformance/math_brute_force/function_list.cpp +++ b/test_conformance/math_brute_force/function_list.cpp @@ -375,8 +375,8 @@ const Func functionList[] = { { NULL }, 3.0f, 0.0f, - 0.0f, - 1.0f, + 1.5f, + 1.5f, 4.0f, INFINITY, INFINITY, diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index 6b72f326..008ab307 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -82,7 +82,6 @@ static int gTestFastRelaxed = 1; OpenCL 2.0 spec then it has to be changed through a command line argument. */ int gFastRelaxedDerived = 1; -static int gToggleCorrectlyRoundedDivideSqrt = 0; int gHasHalf = 0; cl_device_fp_config gHalfCapabilities = 0; int gDeviceILogb0 = 1; @@ -469,8 +468,6 @@ static int ParseArgs(int argc, const char **argv) optionFound = 1; switch (*arg) { - case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break; - case 'd': gHasDouble ^= 1; break; case 'e': gFastRelaxedDerived ^= 1; break; @@ -629,8 +626,6 @@ static void PrintUsage(void) { vlog("%s [-cglsz]: \n", appName); vlog("\toptions:\n"); - vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: " - "off)\n"); vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 " "on)\n"); vlog("\t\t-f\tToggle float precision testing. (Default: on)\n"); @@ -942,13 +937,6 @@ test_status InitCL(cl_device_id device) vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); - if (gToggleCorrectlyRoundedDivideSqrt) - { - gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; - } - vlog("\tTesting with correctly rounded float divide and sqrt? %s\n", - no_yes[0 - != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]); vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]); diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp index ee8a61b8..2761ab97 100644 --- a/test_conformance/math_brute_force/unary_float.cpp +++ b/test_conformance/math_brute_force/unary_float.cpp @@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) INFINITY; // out of range resut from finite inputs must be numeric } + bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0; + // Init the kernels BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; + test_info.programs, f->nameInCode, + relaxedMode, correctlyRounded }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info)))