fix correctly rounded behavior for math bruteforce tests (#2397)

fixes #2387 

Corrects the "correctly rounded" behavior for the math bruteforce tests.
Specifically:

* Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option
for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive
this build option. This means that there is a difference in the behavior
of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr`
tests, and the "correctly rounded" build option is not applied to the
fp16 or fp64 tests.
* Removes the build option to toggle testing the correctly rounded
divide and square root tests since it no longer needed. Instead, the
test names can be used to choose whether to test the correctly rounded
functions or the non-correctly rounded functions.

Additionally:

* Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed
to pass this test on some of our devices. This part is still under
discussion, so I will keep this PR as a draft until it is settled.
This commit is contained in:
Ben Ashbaugh
2025-07-15 09:01:19 -07:00
committed by GitHub
parent 933874f070
commit 8d4a870059
6 changed files with 21 additions and 26 deletions

View File

@@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
} }
bool correctlyRounded = strcmp(f->name, "divide_cr") == 0;
// Init the kernels // Init the kernels
BuildKernelInfo build_info{ test_info.threadCount, test_info.k, BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
test_info.programs, f->nameInCode, test_info.programs, f->nameInCode,
relaxedMode }; relaxedMode, correctlyRounded };
if ((error = ThreadPool_Do(BuildKernelFn, if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex, gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info))) &build_info)))

View File

@@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
} }
std::string GetBuildOptions(bool relaxed_mode) std::string GetBuildOptions(const BuildKernelInfo &info)
{ {
std::ostringstream options; std::ostringstream options;
@@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode)
options << " -cl-denorms-are-zero"; options << " -cl-denorms-are-zero";
} }
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) if (info.relaxedMode)
{
options << " -cl-fp32-correctly-rounded-divide-sqrt";
}
if (relaxed_mode)
{ {
options << " -cl-fast-relaxed-math"; options << " -cl-fast-relaxed-math";
} }
if (info.correctlyRounded)
{
options << " -cl-fp32-correctly-rounded-divide-sqrt";
}
return options.str(); return options.str();
} }
@@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
// Create the program. // Create the program.
clProgramWrapper &program = info.programs[vector_size_index]; clProgramWrapper &program = info.programs[vector_size_index];
auto options = GetBuildOptions(info.relaxedMode); auto options = GetBuildOptions(info);
int error = int error =
create_single_kernel_helper(gContext, &program, nullptr, sources.size(), create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
sources.data(), nullptr, options.c_str()); sources.data(), nullptr, options.c_str());

View File

@@ -84,6 +84,9 @@ struct BuildKernelInfo
// Whether to build with -cl-fast-relaxed-math. // Whether to build with -cl-fast-relaxed-math.
bool relaxedMode; bool relaxedMode;
// Whether to build with -cl-fp32-correctly-rounded-divide-sqrt.
bool correctlyRounded;
}; };
// Data common to all math tests. // Data common to all math tests.

View File

@@ -375,8 +375,8 @@ const Func functionList[] = {
{ NULL }, { NULL },
3.0f, 3.0f,
0.0f, 0.0f,
0.0f, 1.5f,
1.0f, 1.5f,
4.0f, 4.0f,
INFINITY, INFINITY,
INFINITY, INFINITY,

View File

@@ -82,7 +82,6 @@ static int gTestFastRelaxed = 1;
OpenCL 2.0 spec then it has to be changed through a command line argument. OpenCL 2.0 spec then it has to be changed through a command line argument.
*/ */
int gFastRelaxedDerived = 1; int gFastRelaxedDerived = 1;
static int gToggleCorrectlyRoundedDivideSqrt = 0;
int gHasHalf = 0; int gHasHalf = 0;
cl_device_fp_config gHalfCapabilities = 0; cl_device_fp_config gHalfCapabilities = 0;
int gDeviceILogb0 = 1; int gDeviceILogb0 = 1;
@@ -469,8 +468,6 @@ static int ParseArgs(int argc, const char **argv)
optionFound = 1; optionFound = 1;
switch (*arg) switch (*arg)
{ {
case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
case 'd': gHasDouble ^= 1; break; case 'd': gHasDouble ^= 1; break;
case 'e': gFastRelaxedDerived ^= 1; break; case 'e': gFastRelaxedDerived ^= 1; break;
@@ -629,8 +626,6 @@ static void PrintUsage(void)
{ {
vlog("%s [-cglsz]: <optional: math function names>\n", appName); vlog("%s [-cglsz]: <optional: math function names>\n", appName);
vlog("\toptions:\n"); vlog("\toptions:\n");
vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
"off)\n");
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 " vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
"on)\n"); "on)\n");
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n"); vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
@@ -942,13 +937,6 @@ test_status InitCL(cl_device_id device)
vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n", vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
no_yes[0 no_yes[0
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]); != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
if (gToggleCorrectlyRoundedDivideSqrt)
{
gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
}
vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
no_yes[0
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
vlog("\tTesting with FTZ mode ON for floats? %s\n", vlog("\tTesting with FTZ mode ON for floats? %s\n",
no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]); no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]); vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);

View File

@@ -563,10 +563,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
INFINITY; // out of range resut from finite inputs must be numeric INFINITY; // out of range resut from finite inputs must be numeric
} }
bool correctlyRounded = strcmp(f->name, "sqrt_cr") == 0;
// Init the kernels // Init the kernels
BuildKernelInfo build_info{ test_info.threadCount, test_info.k, BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
test_info.programs, f->nameInCode, test_info.programs, f->nameInCode,
relaxedMode }; relaxedMode, correctlyRounded };
if ((error = ThreadPool_Do(BuildKernelFn, if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex, gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info))) &build_info)))