fix correctly rounded behavior for math bruteforce tests (#2397)

fixes #2387 Corrects the "correctly rounded" behavior for the math bruteforce tests. Specifically: * Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive this build option. This means that there is a difference in the behavior of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr` tests, and the "correctly rounded" build option is not applied to the fp16 or fp64 tests. * Removes the build option to toggle testing the correctly rounded divide and square root tests since it no longer needed. Instead, the test names can be used to choose whether to test the correctly rounded functions or the non-correctly rounded functions. Additionally: * Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed to pass this test on some of our devices. This part is still under discussion, so I will keep this PR as a draft until it is settled.
2026-03-19 06:09:01 +00:00 · 2025-07-15 09:01:19 -07:00
parent 933874f070
commit 8d4a870059
6 changed files with 21 additions and 26 deletions
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
    }
    bool correctlyRounded = strcmp(f->name, "divide_cr") == 0;
    // Init the kernels
    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                test_info.programs, f->nameInCode,
+                                test_info.programs,    f->nameInCode,
-                                relaxedMode };
+                                relaxedMode,           correctlyRounded };
    if ((error = ThreadPool_Do(BuildKernelFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
    if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
 }
-std::string GetBuildOptions(bool relaxed_mode)
+std::string GetBuildOptions(const BuildKernelInfo &info)
 {
    std::ostringstream options;
@@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode)
        options << " -cl-denorms-are-zero";
    }
-    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
+    if (info.relaxedMode)
    {
        options << " -cl-fp32-correctly-rounded-divide-sqrt";
    }
    if (relaxed_mode)
    {
        options << " -cl-fast-relaxed-math";
    }
    if (info.correctlyRounded)
    {
        options << " -cl-fp32-correctly-rounded-divide-sqrt";
    }
    return options.str();
 }
@@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
    // Create the program.
    clProgramWrapper &program = info.programs[vector_size_index];
-    auto options = GetBuildOptions(info.relaxedMode);
+    auto options = GetBuildOptions(info);
    int error =
        create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
                                    sources.data(), nullptr, options.c_str());
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -84,6 +84,9 @@ struct BuildKernelInfo
    // Whether to build with -cl-fast-relaxed-math.
    bool relaxedMode;
    // Whether to build with -cl-fp32-correctly-rounded-divide-sqrt.
    bool correctlyRounded;
 };
 // Data common to all math tests.
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -375,8 +375,8 @@ const Func functionList[] = {
      { NULL },
      3.0f,
      0.0f,
-      0.0f,
+      1.5f,
-      1.0f,
+      1.5f,
      4.0f,
      INFINITY,
      INFINITY,
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -82,7 +82,6 @@ static int gTestFastRelaxed = 1;
  OpenCL 2.0 spec then it has to be changed through a command line argument.
 */
 int gFastRelaxedDerived = 1;
 static int gToggleCorrectlyRoundedDivideSqrt = 0;
 int gHasHalf = 0;
 cl_device_fp_config gHalfCapabilities = 0;
 int gDeviceILogb0 = 1;
@@ -469,8 +468,6 @@ static int ParseArgs(int argc, const char **argv)
                optionFound = 1;
                switch (*arg)
                {
                    case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
                    case 'd': gHasDouble ^= 1; break;
                    case 'e': gFastRelaxedDerived ^= 1; break;
@@ -629,8 +626,6 @@ static void PrintUsage(void)
 {
    vlog("%s [-cglsz]: <optional: math function names>\n", appName);
    vlog("\toptions:\n");
    vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
         "off)\n");
    vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
         "on)\n");
    vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
@@ -942,13 +937,6 @@ test_status InitCL(cl_device_id device)
    vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
         no_yes[0
                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
    if (gToggleCorrectlyRoundedDivideSqrt)
    {
        gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
    }
    vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
         no_yes[0
                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
    vlog("\tTesting with FTZ mode ON for floats? %s\n",
         no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
    vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            INFINITY; // out of range resut from finite inputs must be numeric
    }
    bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0;
    // Init the kernels
    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                test_info.programs, f->nameInCode,
+                                test_info.programs,    f->nameInCode,
-                                relaxedMode };
+                                relaxedMode,           correctlyRounded };
    if ((error = ThreadPool_Do(BuildKernelFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))