Files
OpenCL-CTS/test_conformance/math_brute_force/mad_double.cpp
Sven van Haastregt d9a938b698 Factor out GetTernaryKernel (#1511)
Use a common function to create the kernel source code for testing
3-argument math builtins.  This reduces code duplication.  1-argument
and 2-argument math kernel construction will be factored out in future
work.

Change the kernels to use preprocessor defines for argument types and
undef values, to make the CTS code easier to read.

Co-authored-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
Co-authored-by: Marco Antognini <marco.antognini@arm.com>
2022-10-04 09:28:29 -07:00

224 lines
7.4 KiB
C++

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
namespace {
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
auto kernel_name = GetKernelName(vectorSize);
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
ParameterType::Double, ParameterType::Double,
ParameterType::Double, vectorSize);
std::array<const char *, 1> sources{ source.c_str() };
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
relaxedMode);
}
struct BuildKernelInfo2
{
cl_kernel *kernels;
Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
};
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
cl_uint vectorSize = gMinVectorSizeIndex + job_id;
return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
&(info->programs[vectorSize]), info->relaxedMode);
}
} // anonymous namespace
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
double maxErrorVal = 0.0f;
double maxErrorVal2 = 0.0f;
double maxErrorVal3 = 0.0f;
uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
// Init the kernels
{
BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
return error;
}
for (uint64_t i = 0; i < (1ULL << 32); i += step)
{
// Init input array
double *p = (double *)gIn;
double *p2 = (double *)gIn2;
double *p3 = (double *)gIn3;
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
{
p[j] = DoubleFromUInt32(genrand_int32(d));
p2[j] = DoubleFromUInt32(genrand_int32(d));
p3[j] = DoubleFromUInt32(genrand_int32(d));
}
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
BUFFER_SIZE, gIn, 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
return error;
}
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
return error;
}
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
BUFFER_SIZE, gIn3, 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
return error;
}
// write garbage into output arrays
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
uint32_t pattern = 0xffffdead;
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
if ((error =
clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
{
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
error, j);
goto exit;
}
}
// Run the kernels
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
&gOutBuffer[j])))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
&gInBuffer)))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
&gInBuffer2)))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
&gInBuffer3)))
{
LogBuildError(programs[j]);
goto exit;
}
if ((error =
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
&localCount, NULL, 0, NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel\n");
goto exit;
}
}
// Get that moving
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
// Calculate the correctly rounded reference result
double *r = (double *)gOut_Ref;
double *s = (double *)gIn;
double *s2 = (double *)gIn2;
double *s3 = (double *)gIn3;
for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
// Read the data back
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
if ((error =
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
{
vlog_error("ReadArray failed %d\n", error);
goto exit;
}
}
if (gSkipCorrectnessTesting) break;
// Verify data -- No verification possible.
// MAD is a random number generator.
if (0 == (i & 0x0fffffff))
{
vlog(".");
fflush(stdout);
}
}
if (!gSkipCorrectnessTesting)
{
if (gWimpyMode)
vlog("Wimp pass");
else
vlog("passed");
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
maxErrorVal3);
}
vlog("\n");
exit:
// Release
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
}
return error;
}