From 21ee05ecafde275886a2fd57499cb4000b446dd7 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 13 Aug 2024 18:18:33 +0200 Subject: [PATCH] math_brute_force: stop relying on volatile for IsTininessDetectedBeforeRounding (#2038) This makes it literally impossible for drivers to constant fold the IsTininessDetectedBeforeRounding kernel. Sure, drivers might have should respect volatile here, but I'm not convinced this is actually required by the spec in a very strict sense, because here there are no side-effects possible in the first place. And as far as I know, constant folding is allowed to give different results than an actual GPU calculation would. In any case, passing the constants via kernel arguments makes this detection more reliable and one doesn't have to wonder why the fma test is failing. Side note: this was the last bug (known as of today) I had to fix in order being able to make a CL CTS submission for Apple Silicon devices. --- test_conformance/math_brute_force/main.cpp | 23 +++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index d939984e..e1ea4c25 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1043,13 +1043,14 @@ int IsTininessDetectedBeforeRounding(void) { int error; const char *kernelSource = - R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out ) + R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out, float a, float b ) { - volatile float a = 0x1.000002p-126f; - volatile float b = 0x1.fffffcp-1f; out[0] = a * b; // product is 0x1.fffffffffff8p-127 })"; + float a = 0x1.000002p-126f; + float b = 0x1.fffffcp-1f; + clProgramWrapper query; clKernelWrapper kernel; error = @@ -1073,6 +1074,22 @@ int IsTininessDetectedBeforeRounding(void) return error; } + if ((error = clSetKernelArg(kernel, 1, sizeof(a), &a))) + { + vlog_error("Error: Unable to set kernel arg to detect how tininess is " + "detected for the device. Err = %d", + error); + return error; + } + + if ((error = clSetKernelArg(kernel, 2, sizeof(b), &b))) + { + vlog_error("Error: Unable to set kernel arg to detect how tininess is " + "detected for the device. Err = %d", + error); + return error; + } + size_t dim = 1; if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0, NULL, NULL)))