From 8d4a8700597dccbd81e9292620269854196b8a81 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 15 Jul 2025 09:01:19 -0700
Subject: [PATCH] fix correctly rounded behavior for math bruteforce tests
 (#2397)

fixes #2387

Corrects the "correctly rounded" behavior for the math bruteforce tests.
Specifically:

* Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option
for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive
this build option. This means that there is a difference in the behavior
of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr`
tests, and the "correctly rounded" build option is not applied to the
fp16 or fp64 tests.
* Removes the build option to toggle testing the correctly rounded
divide and square root tests since it no longer needed. Instead, the
test names can be used to choose whether to test the correctly rounded
functions or the non-correctly rounded functions.

Additionally:

* Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed
to pass this test on some of our devices. This part is still under
discussion, so I will keep this PR as a draft until it is settled.
---
 .../math_brute_force/binary_operator_float.cpp   |  6 ++++--
 test_conformance/math_brute_force/common.cpp     | 16 ++++++++--------
 test_conformance/math_brute_force/common.h       |  3 +++
 .../math_brute_force/function_list.cpp           |  4 ++--
 test_conformance/math_brute_force/main.cpp       | 12 ------------
 .../math_brute_force/unary_float.cpp             |  6 ++++--
 6 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index cce6e122..17eb998f 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
+    bool correctlyRounded = strcmp(f->name, "divide_cr") == 0;
+
     // Init the kernels
     BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                test_info.programs, f->nameInCode,
-                                relaxedMode };
+                                test_info.programs,    f->nameInCode,
+                                relaxedMode,           correctlyRounded };
     if ((error = ThreadPool_Do(BuildKernelFn,
                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                &build_info)))
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index df45a700..257e2595 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
     if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
 }
 
-std::string GetBuildOptions(bool relaxed_mode)
+std::string GetBuildOptions(const BuildKernelInfo &info)
 {
     std::ostringstream options;
 
@@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode)
         options << " -cl-denorms-are-zero";
     }
 
-    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
-    {
-        options << " -cl-fp32-correctly-rounded-divide-sqrt";
-    }
-
-    if (relaxed_mode)
+    if (info.relaxedMode)
     {
         options << " -cl-fast-relaxed-math";
     }
 
+    if (info.correctlyRounded)
+    {
+        options << " -cl-fp32-correctly-rounded-divide-sqrt";
+    }
+
     return options.str();
 }
 
@@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
 
     // Create the program.
     clProgramWrapper &program = info.programs[vector_size_index];
-    auto options = GetBuildOptions(info.relaxedMode);
+    auto options = GetBuildOptions(info);
     int error =
         create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
                                     sources.data(), nullptr, options.c_str());
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 3f89ef6c..d7e70a71 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -84,6 +84,9 @@ struct BuildKernelInfo
 
     // Whether to build with -cl-fast-relaxed-math.
     bool relaxedMode;
+
+    // Whether to build with -cl-fp32-correctly-rounded-divide-sqrt.
+    bool correctlyRounded;
 };
 
 // Data common to all math tests.
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 90731ea0..408a394a 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -375,8 +375,8 @@ const Func functionList[] = {
       { NULL },
       3.0f,
       0.0f,
-      0.0f,
-      1.0f,
+      1.5f,
+      1.5f,
       4.0f,
       INFINITY,
       INFINITY,
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 6b72f326..008ab307 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -82,7 +82,6 @@ static int gTestFastRelaxed = 1;
   OpenCL 2.0 spec then it has to be changed through a command line argument.
 */
 int gFastRelaxedDerived = 1;
-static int gToggleCorrectlyRoundedDivideSqrt = 0;
 int gHasHalf = 0;
 cl_device_fp_config gHalfCapabilities = 0;
 int gDeviceILogb0 = 1;
@@ -469,8 +468,6 @@ static int ParseArgs(int argc, const char **argv)
                 optionFound = 1;
                 switch (*arg)
                 {
-                    case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
-
                     case 'd': gHasDouble ^= 1; break;
 
                     case 'e': gFastRelaxedDerived ^= 1; break;
@@ -629,8 +626,6 @@ static void PrintUsage(void)
 {
     vlog("%s [-cglsz]: <optional: math function names>\n", appName);
     vlog("\toptions:\n");
-    vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
-         "off)\n");
     vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
          "on)\n");
     vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
@@ -942,13 +937,6 @@ test_status InitCL(cl_device_id device)
     vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
          no_yes[0
                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
-    if (gToggleCorrectlyRoundedDivideSqrt)
-    {
-        gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
-    }
-    vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
-         no_yes[0
-                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
     vlog("\tTesting with FTZ mode ON for floats? %s\n",
          no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
     vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index ee8a61b8..2761ab97 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             INFINITY; // out of range resut from finite inputs must be numeric
     }
 
+    bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0;
+
     // Init the kernels
     BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                test_info.programs, f->nameInCode,
-                                relaxedMode };
+                                test_info.programs,    f->nameInCode,
+                                relaxedMode,           correctlyRounded };
     if ((error = ThreadPool_Do(BuildKernelFn,
                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                &build_info)))