mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fp16 math bruteforce staging (#1863)
* Enable fp16 in math bruteforce * Added modernization of remaining half tests for consistency (issue #142, bruteforce) * Added kernel types related corrections * Added more fixes and general cleanup * Corrected ULP values for half tests (issue #142, bruteforce) * Corrected presubmit check for clang format * Added support for ternary, unary_two_result and unary_two_result_i tests for cl_half (issue #142, bruteforce) * Added missing condition due to vendor's review * code format correction * Added check for lack of support for denormals in binary_half scenario * Corrected procedure to compute nextafter cl_half for flush-to-zero mode * Added correction for external check of reference value for nextafter test * Added correction due to code review request * Changed quantity of tests performed for half in unary and macro_unary procedures from basic * Added corrections related to code review: -added binary_operator_half.cpp and binary_two_results_i_half.cpp -address sanitizer errors fixed -extending list of special half values -removed unnecessary relaxed math references in half tests -corrected conditions to verify ulp narrowing of computation results -several refactoring and cosmetics corrections * Print format correction due to failed CI check * Corrected bug found in code review (fp16 bruteforce) * Corrections related to code review (cl_khr_fp16 support according to #142) -gHostFill missing support added -special half values array extended -cosmetics and unifying * clang format applied * consistency correction * more consistency corrections for cl_fp16_khr supported tests * Corrections related to code review (bureforce #142) * Correction for i_unary_half test capacity * Corrections related to capacity of cl_khr_fp16 tests in bruteforce (#142) --------- Co-authored-by: Wawiorko, Grzegorz <grzegorz.wawiorko@intel.com>
This commit is contained in:
@@ -3,24 +3,32 @@ set(MODULE_NAME BRUTEFORCE)
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
binary_double.cpp
|
||||
binary_float.cpp
|
||||
binary_half.cpp
|
||||
binary_i_double.cpp
|
||||
binary_i_float.cpp
|
||||
binary_i_half.cpp
|
||||
binary_operator_double.cpp
|
||||
binary_operator_float.cpp
|
||||
binary_operator_half.cpp
|
||||
binary_two_results_i_double.cpp
|
||||
binary_two_results_i_float.cpp
|
||||
binary_two_results_i_half.cpp
|
||||
common.cpp
|
||||
common.h
|
||||
function_list.cpp
|
||||
function_list.h
|
||||
i_unary_double.cpp
|
||||
i_unary_float.cpp
|
||||
i_unary_half.cpp
|
||||
macro_binary_double.cpp
|
||||
macro_binary_float.cpp
|
||||
macro_binary_half.cpp
|
||||
macro_unary_double.cpp
|
||||
macro_unary_float.cpp
|
||||
macro_unary_half.cpp
|
||||
mad_double.cpp
|
||||
mad_float.cpp
|
||||
mad_half.cpp
|
||||
main.cpp
|
||||
reference_math.cpp
|
||||
reference_math.h
|
||||
@@ -28,15 +36,20 @@ set(${MODULE_NAME}_SOURCES
|
||||
sleep.h
|
||||
ternary_double.cpp
|
||||
ternary_float.cpp
|
||||
ternary_half.cpp
|
||||
test_functions.h
|
||||
unary_double.cpp
|
||||
unary_float.cpp
|
||||
unary_half.cpp
|
||||
unary_two_results_double.cpp
|
||||
unary_two_results_float.cpp
|
||||
unary_two_results_half.cpp
|
||||
unary_two_results_i_double.cpp
|
||||
unary_two_results_i_float.cpp
|
||||
unary_two_results_i_half.cpp
|
||||
unary_u_double.cpp
|
||||
unary_u_float.cpp
|
||||
unary_u_half.cpp
|
||||
utility.cpp
|
||||
utility.h
|
||||
)
|
||||
|
||||
784
test_conformance/math_brute_force/binary_half.cpp
Normal file
784
test_conformance/math_brute_force/binary_half.cpp
Normal file
@@ -0,0 +1,784 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "harness/errorHelpers.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
#include "reference_math.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
struct ThreadInfo
|
||||
{
|
||||
clMemWrapper inBuf; // input buffer for the thread
|
||||
clMemWrapper inBuf2; // input buffer for the thread
|
||||
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double
|
||||
maxErrorValue; // position of the max error value (param 1). Init to 0.
|
||||
double maxErrorValue2; // position of the max error value (param 2). Init
|
||||
// to 0.
|
||||
MTdataHolder d;
|
||||
|
||||
clCommandQueueWrapper
|
||||
tQueue; // per thread command queue to improve performance
|
||||
};
|
||||
|
||||
struct TestInfoBase
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
int isFDim;
|
||||
int skipNanInf;
|
||||
int isNextafter;
|
||||
};
|
||||
|
||||
struct TestInfo : public TestInfoBase
|
||||
{
|
||||
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
const cl_half specialValuesHalf[] = {
|
||||
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
float ulps = job->ulps;
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
|
||||
int isFDim = job->isFDim;
|
||||
int skipNanInf = job->skipNanInf;
|
||||
int isNextafter = job->isNextafter;
|
||||
cl_ushort *t;
|
||||
cl_half *r;
|
||||
std::vector<float> s(0), s2(0);
|
||||
cl_uint j = 0;
|
||||
|
||||
RoundingMode oldRoundMode;
|
||||
cl_int copysign_test = 0;
|
||||
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_size, 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||
cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount =
|
||||
specialValuesHalfCount * specialValuesHalfCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
{ // test edge cases
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = specialValuesHalf[x];
|
||||
p2[j] = specialValuesHalf[y];
|
||||
if (++x >= specialValuesHalfCount)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
if (y >= specialValuesHalfCount) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = (cl_ushort)genrand_int32(d);
|
||||
p2[j] = (cl_ushort)genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||
buffer_size, p2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
&pattern, sizeof(pattern), 0,
|
||||
buffer_size, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||
&tinfo->inBuf2)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting)
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
FPU_mode_type oldMode;
|
||||
oldRoundMode = kRoundToNearestEven;
|
||||
if (isFDim)
|
||||
{
|
||||
// Calculate the correctly rounded reference result
|
||||
memset(&oldMode, 0, sizeof(oldMode));
|
||||
if (ftz) ForceFTZ(&oldMode);
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
}
|
||||
|
||||
if (!strcmp(name, "copysign")) copysign_test = 1;
|
||||
|
||||
#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2))
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||
t = (cl_ushort *)r;
|
||||
s.resize(buffer_elements);
|
||||
s2.resize(buffer_elements);
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = cl_half_to_float(p[j]);
|
||||
s2[j] = cl_half_to_float(p2[j]);
|
||||
if (isNextafter)
|
||||
r[j] = cl_half_from_float(reference_nextafterh(s[j], s2[j]),
|
||||
CL_HALF_RTE);
|
||||
else
|
||||
r[j] = cl_half_from_float(ref_func(s[j], s2[j]), CL_HALF_RTE);
|
||||
}
|
||||
|
||||
if (isFDim && ftz) RestoreFPState(&oldMode);
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||
// an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify data
|
||||
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ushort *q = out[k];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
double correct;
|
||||
if (isNextafter)
|
||||
correct = reference_nextafterh(s[j], s2[j]);
|
||||
else
|
||||
correct = ref_func(s[j], s2[j]);
|
||||
|
||||
float test = cl_half_to_float(q[j]);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or
|
||||
// output is a infinity or NaN or overflow
|
||||
if (skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions
|
||||
// calculate in single precision.
|
||||
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||
|| IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
|
||||
|| IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
|
||||
continue;
|
||||
}
|
||||
float err = Ulp_Error_Half(q[j], correct);
|
||||
int fail = !(fabsf(err) <= ulps);
|
||||
|
||||
if (fail && ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, ulps))
|
||||
{
|
||||
if (isNextafter)
|
||||
{
|
||||
correct = reference_nextafterh(s[j], s2[j], false);
|
||||
err = Ulp_Error_Half(q[j], correct);
|
||||
fail = !(fabsf(err) <= ulps);
|
||||
}
|
||||
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
if (isNextafter)
|
||||
{
|
||||
correct2 = reference_nextafterh(0.0, s2[j]);
|
||||
correct3 = reference_nextafterh(-0.0, s2[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
correct2 = ref_func(0.0, s2[j]);
|
||||
correct3 = ref_func(-0.0, s2[j]);
|
||||
}
|
||||
if (skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2)
|
||||
|| IsFloatInfinity(correct3)
|
||||
|| IsFloatNaN(correct3))
|
||||
continue;
|
||||
}
|
||||
|
||||
auto check_error = [&]() {
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
};
|
||||
check_error();
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
if (fail && isNextafter)
|
||||
{
|
||||
correct2 =
|
||||
reference_nextafterh(0.0, s2[j], false);
|
||||
correct3 =
|
||||
reference_nextafterh(-0.0, s2[j], false);
|
||||
check_error();
|
||||
}
|
||||
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// allow to omit denorm values for platforms with no
|
||||
// denorm support for nextafter
|
||||
if (fail && (isNextafter)
|
||||
&& (correct <= cl_half_to_float(0x3FF))
|
||||
&& (correct >= cl_half_to_float(0x83FF)))
|
||||
{
|
||||
fail = fail && (q[j] != p[j]);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// try with both args as zero
|
||||
if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
double correct4, correct5;
|
||||
float err4, err5;
|
||||
|
||||
if (isNextafter)
|
||||
{
|
||||
correct2 = reference_nextafterh(0.0, 0.0);
|
||||
correct3 = reference_nextafterh(-0.0, 0.0);
|
||||
correct4 = reference_nextafterh(0.0, -0.0);
|
||||
correct5 = reference_nextafterh(-0.0, -0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
correct2 = ref_func(0.0, 0.0);
|
||||
correct3 = ref_func(-0.0, 0.0);
|
||||
correct4 = ref_func(0.0, -0.0);
|
||||
correct5 = ref_func(-0.0, -0.0);
|
||||
}
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if
|
||||
// an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2)
|
||||
|| IsFloatInfinity(correct3)
|
||||
|| IsFloatNaN(correct3)
|
||||
|| IsFloatInfinity(correct4)
|
||||
|| IsFloatNaN(correct4)
|
||||
|| IsFloatInfinity(correct5)
|
||||
|| IsFloatNaN(correct5))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
err4 = Ulp_Error_Half(q[j], correct4);
|
||||
err5 = Ulp_Error_Half(q[j], correct5);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps))
|
||||
&& (!(fabsf(err4) <= ulps))
|
||||
&& (!(fabsf(err5) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps)
|
||||
|| IsHalfResultSubnormal(correct4, ulps)
|
||||
|| IsHalfResultSubnormal(correct5, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// allow to omit denorm values for platforms with no
|
||||
// denorm support for nextafter
|
||||
if (fail && (isNextafter)
|
||||
&& (correct <= cl_half_to_float(0x3FF))
|
||||
&& (correct >= cl_half_to_float(0x83FF)))
|
||||
{
|
||||
fail = fail && (q[j] != p2[j]);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
if (isNextafter)
|
||||
{
|
||||
correct2 = reference_nextafterh(s[j], 0.0);
|
||||
correct3 = reference_nextafterh(s[j], -0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
correct2 = ref_func(s[j], 0.0);
|
||||
correct3 = ref_func(s[j], -0.0);
|
||||
}
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||
|| IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2))
|
||||
continue;
|
||||
}
|
||||
|
||||
auto check_error = [&]() {
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
};
|
||||
check_error();
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
if (fail && isNextafter)
|
||||
{
|
||||
correct2 =
|
||||
reference_nextafterh(s[j], 0.0, false);
|
||||
correct3 =
|
||||
reference_nextafterh(s[j], -0.0, false);
|
||||
check_error();
|
||||
}
|
||||
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// allow to omit denorm values for platforms with no
|
||||
// denorm support for nextafter
|
||||
if (fail && (isNextafter)
|
||||
&& (correct <= cl_half_to_float(0x3FF))
|
||||
&& (correct >= cl_half_to_float(0x83FF)))
|
||||
{
|
||||
fail = fail && (q[j] != p2[j]);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(err) > tinfo->maxError)
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||
"%a (0x%04x)}\nExpected: %a (half 0x%04x) "
|
||||
"\nActual: %a (half 0x%04x) at index: %u\n",
|
||||
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||
p2[j], cl_half_to_float(r[j]), r[j], test, q[j],
|
||||
j);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_Half_Half_common(const Func *f, MTdata d, int isNextafter,
|
||||
bool relaxedMode)
|
||||
{
|
||||
TestInfoBase test_info_base;
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
// Init test_info
|
||||
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||
TestInfo test_info(test_info_base);
|
||||
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->half_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||
test_info.isNextafter = isNextafter;
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error(
|
||||
"Error: Unable to create sub-buffer of gOutBuffer[%d] "
|
||||
"for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int TestFunc_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
return TestFunc_Half_Half_Half_common(f, d, 0, relaxedMode);
|
||||
}
|
||||
|
||||
int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
return TestFunc_Half_Half_Half_common(f, d, 1, relaxedMode);
|
||||
}
|
||||
@@ -193,16 +193,14 @@ const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
const int specialValuesInt[] = {
|
||||
0, 1, 2, 3, 1022, 1023, 1024, INT_MIN,
|
||||
INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesIntCount =
|
||||
sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
|
||||
constexpr size_t specialValuesIntCount = ARRAY_SIZE(specialValuesInt);
|
||||
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
|
||||
@@ -184,8 +184,7 @@ const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
const int specialValuesInt[] = {
|
||||
0, 1, 2, 3, 126, 127,
|
||||
@@ -194,9 +193,7 @@ const int specialValuesInt[] = {
|
||||
-0x04000001, -1465264071, -1488522147,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesIntCount =
|
||||
sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
|
||||
|
||||
constexpr size_t specialValuesIntCount = ARRAY_SIZE(specialValuesInt);
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
|
||||
548
test_conformance/math_brute_force/binary_i_half.cpp
Normal file
548
test_conformance/math_brute_force/binary_i_half.cpp
Normal file
@@ -0,0 +1,548 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Int,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
{
|
||||
clMemWrapper inBuf; // input buffer for the thread
|
||||
clMemWrapper inBuf2; // input buffer for the thread
|
||||
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double
|
||||
maxErrorValue; // position of the max error value (param 1). Init to 0.
|
||||
cl_int maxErrorValue2; // position of the max error value (param 2). Init
|
||||
// to 0.
|
||||
MTdataHolder d;
|
||||
clCommandQueueWrapper
|
||||
tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
|
||||
struct TestInfoBase
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
};
|
||||
|
||||
struct TestInfo : public TestInfoBase
|
||||
{
|
||||
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
const cl_half specialValuesHalf[] = {
|
||||
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||
|
||||
const int specialValuesInt3[] = { 0, 1, 2, 3, 1022, 1023,
|
||||
1024, INT_MIN, INT_MAX, -1, -2, -3,
|
||||
-1022, -1023, -11024, -INT_MAX };
|
||||
size_t specialValuesInt3Count = ARRAY_SIZE(specialValuesInt3);
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
float ulps = job->ulps;
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_ushort *t;
|
||||
cl_half *r;
|
||||
std::vector<float> s;
|
||||
cl_int *s2;
|
||||
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_elements * sizeof(cl_ushort), 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount =
|
||||
specialValuesHalfCount * specialValuesInt3Count;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
if (job_id <= (cl_uint)indx)
|
||||
{ // test edge cases
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = specialValuesHalf[x];
|
||||
p2[j] = specialValuesInt3[y];
|
||||
if (++x >= specialValuesHalfCount)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
if (y >= specialValuesInt3Count) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = (cl_ushort)genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_elements * sizeof(cl_half), p, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||
buffer_elements * sizeof(cl_int), p2, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern,
|
||||
buffer_elements * sizeof(cl_half));
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], &pattern, sizeof(pattern), 0,
|
||||
buffer_elements * sizeof(cl_half), 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||
&tinfo->inBuf2)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||
t = (cl_ushort *)r;
|
||||
s.resize(buffer_elements);
|
||||
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = cl_half_to_float(p[j]);
|
||||
r[j] = HFF(func.f_fi(s[j], s2[j]));
|
||||
}
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||
// an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||
buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0,
|
||||
buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify data
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ushort *q = out[k];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
float test = cl_half_to_float(q[j]);
|
||||
double correct = func.f_fi(s[j], s2[j]);
|
||||
float err = Ulp_Error_Half(q[j], correct);
|
||||
int fail = !(fabsf(err) <= ulps);
|
||||
|
||||
if (fail && ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
correct2 = func.f_fi(0.0, s2[j]);
|
||||
correct3 = func.f_fi(-0.0, s2[j]);
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(err) > tinfo->maxError)
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||
"%d}\nExpected: %a (half 0x%04x) \nActual: %a "
|
||||
"(half 0x%04x) at index: %d\n",
|
||||
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||
cl_half_to_float(r[j]), r[j], test, q[j],
|
||||
(cl_uint)j);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_Half_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfoBase test_info_base;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
cl_int maxErrorVal2 = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||
TestInfo test_info(test_info_base);
|
||||
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_int) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->half_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||
* sizeof(cl_int),
|
||||
test_info.subBufferSize * sizeof(cl_int) };
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||
"for region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||
}
|
||||
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -192,8 +192,7 @@ const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
|
||||
@@ -184,8 +184,7 @@ const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
|
||||
680
test_conformance/math_brute_force/binary_operator_half.cpp
Normal file
680
test_conformance/math_brute_force/binary_operator_half.cpp
Normal file
@@ -0,0 +1,680 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
struct ThreadInfo
|
||||
{
|
||||
// Input and output buffers for the thread
|
||||
clMemWrapper inBuf;
|
||||
clMemWrapper inBuf2;
|
||||
Buffers outBuf;
|
||||
|
||||
// max error value. Init to 0.
|
||||
float maxError;
|
||||
// position of the max error value (param 1). Init to 0.
|
||||
double maxErrorValue;
|
||||
// position of the max error value (param 2). Init to 0.
|
||||
double maxErrorValue2;
|
||||
MTdataHolder d;
|
||||
|
||||
// Per thread command queue to improve performance
|
||||
clCommandQueueWrapper tQueue;
|
||||
};
|
||||
|
||||
struct TestInfo
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
// no special fields
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
const cl_half specialValuesHalf[] = {
|
||||
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
float ulps = job->ulps;
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_int error;
|
||||
|
||||
const char *name = job->f->name;
|
||||
cl_half *r = 0;
|
||||
std::vector<float> s(0), s2(0);
|
||||
RoundingMode oldRoundMode;
|
||||
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_half *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_size, 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
bool divide = strcmp(name, "divide") == 0;
|
||||
|
||||
// Init input array
|
||||
cl_half *p = (cl_half *)gIn + thread_id * buffer_elements;
|
||||
cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
|
||||
cl_uint idx = 0;
|
||||
int totalSpecialValueCount =
|
||||
specialValuesHalfCount * specialValuesHalfCount;
|
||||
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{
|
||||
// Insert special values
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = specialValuesHalf[x];
|
||||
p2[idx] = specialValuesHalf[y];
|
||||
if (++x >= specialValuesHalfCount)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
if (y >= specialValuesHalfCount) break;
|
||||
}
|
||||
|
||||
if (divide)
|
||||
{
|
||||
cl_half pj = p[idx] & 0x7fff;
|
||||
cl_half p2j = p2[idx] & 0x7fff;
|
||||
// Replace values outside [2^-7, 2^7] with QNaN
|
||||
if (pj < 0x2000 || pj > 0x5800) p[idx] = 0x7e00; // HALF_NAN
|
||||
if (p2j < 0x2000 || p2j > 0x5800) p2[idx] = 0x7e00;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
p[idx] = (cl_half)genrand_int32(d);
|
||||
p2[idx] = (cl_half)genrand_int32(d);
|
||||
|
||||
if (divide)
|
||||
{
|
||||
cl_half pj = p[idx] & 0x7fff;
|
||||
cl_half p2j = p2[idx] & 0x7fff;
|
||||
// Replace values outside [2^-7, 2^7] with QNaN
|
||||
if (pj < 0x2000 || pj > 0x5800) p[idx] = 0x7e00; // HALF_NAN
|
||||
if (p2j < 0x2000 || p2j > 0x5800) p2[idx] = 0x7e00;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||
buffer_size, p2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
&pattern, sizeof(pattern), 0,
|
||||
buffer_size, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// Run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||
&tinfo->inBuf2)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting)
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
FPU_mode_type oldMode;
|
||||
memset(&oldMode, 0, sizeof(oldMode));
|
||||
if (ftz) ForceFTZ(&oldMode);
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
oldRoundMode = kRoundToNearestEven;
|
||||
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||
s.resize(buffer_elements);
|
||||
s2.resize(buffer_elements);
|
||||
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = HTF(p[j]);
|
||||
s2[j] = HTF(p2[j]);
|
||||
r[j] = HFF(func.f_ff(s[j], s2[j]));
|
||||
}
|
||||
|
||||
if (ftz) RestoreFPState(&oldMode);
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||
// for the last buffer. This is an in order queue.
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify data
|
||||
|
||||
for (size_t j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_half *q = out[k];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (r[j] != q[j])
|
||||
{
|
||||
float test = HTF(q[j]);
|
||||
float correct = func.f_ff(s[j], s2[j]);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or
|
||||
// output is a infinity or NaN or overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions
|
||||
// calculate in single precision.
|
||||
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||
|| IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
|
||||
|| IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
|
||||
continue;
|
||||
}
|
||||
|
||||
float err = Ulp_Error_Half(q[j], correct);
|
||||
|
||||
int fail = !(fabsf(err) <= ulps);
|
||||
|
||||
if (fail && ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
correct2 = HTF(func.f_ff(0.0, s2[j]));
|
||||
correct3 = HTF(func.f_ff(-0.0, s2[j]));
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an
|
||||
// input or output is a infinity or NaN or overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2)
|
||||
|| IsFloatInfinity(correct3)
|
||||
|| IsFloatNaN(correct3))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// try with both args as zero
|
||||
if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
double correct4, correct5;
|
||||
float err4, err5;
|
||||
|
||||
correct2 = HTF(func.f_ff(0.0, 0.0));
|
||||
correct3 = HTF(func.f_ff(-0.0, 0.0));
|
||||
correct4 = HTF(func.f_ff(0.0, -0.0));
|
||||
correct5 = HTF(func.f_ff(-0.0, -0.0));
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if
|
||||
// an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2)
|
||||
|| IsFloatInfinity(correct3)
|
||||
|| IsFloatNaN(correct3)
|
||||
|| IsFloatInfinity(correct4)
|
||||
|| IsFloatNaN(correct4)
|
||||
|| IsFloatInfinity(correct5)
|
||||
|| IsFloatNaN(correct5))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
err4 = Ulp_Error_Half(q[j], correct4);
|
||||
err5 = Ulp_Error_Half(q[j], correct5);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps))
|
||||
&& (!(fabsf(err4) <= ulps))
|
||||
&& (!(fabsf(err5) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps)
|
||||
|| IsHalfResultSubnormal(correct4, ulps)
|
||||
|| IsHalfResultSubnormal(correct5, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
correct2 = HTF(func.f_ff(s[j], 0.0));
|
||||
correct3 = HTF(func.f_ff(s[j], -0.0));
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an
|
||||
// input or output is a infinity or NaN or overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||
|| IsFloatInfinity(correct2)
|
||||
|| IsFloatNaN(correct2))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error_Half(q[j], correct2);
|
||||
err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(err) > tinfo->maxError)
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||
"%a (0x%04x)}\nExpected: %a (half 0x%04x) "
|
||||
"\nActual: %a (half 0x%04x) at index: %zu\n",
|
||||
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||
p2[j], HTF(r[j]), r[j], test, q[j], j);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfo test_info{};
|
||||
cl_int error;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
double maxErrorVal2 = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->half_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of "
|
||||
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||
(int)j, region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
// Run the kernels
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
477
test_conformance/math_brute_force/binary_two_results_i_half.cpp
Normal file
477
test_conformance/math_brute_force/binary_two_results_i_half.cpp
Normal file
@@ -0,0 +1,477 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Int, ParameterType::Half,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
struct ComputeReferenceInfoF
|
||||
{
|
||||
const cl_half *x;
|
||||
const cl_half *y;
|
||||
cl_half *r;
|
||||
int32_t *i;
|
||||
double (*f_ffpI)(double, double, int *);
|
||||
cl_uint lim;
|
||||
cl_uint count;
|
||||
};
|
||||
|
||||
cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||
{
|
||||
ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
|
||||
cl_uint lim = cri->lim;
|
||||
cl_uint count = cri->count;
|
||||
cl_uint off = jid * count;
|
||||
const cl_half *x = cri->x + off;
|
||||
const cl_half *y = cri->y + off;
|
||||
cl_half *r = cri->r + off;
|
||||
int32_t *i = cri->i + off;
|
||||
double (*f)(double, double, int *) = cri->f_ffpI;
|
||||
|
||||
if (off + count > lim) count = lim - off;
|
||||
|
||||
for (cl_uint j = 0; j < count; ++j)
|
||||
r[j] = HFF((float)f((double)HTF(x[j]), (double)HTF(y[j]), i + j));
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
Programs programs;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
KernelMatrix kernels;
|
||||
float maxError = 0.0f;
|
||||
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
int64_t maxError2 = 0;
|
||||
float maxErrorVal = 0.0f;
|
||||
float maxErrorVal2 = 0.0f;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
|
||||
// use larger type of output data to prevent overflowing buffer size
|
||||
constexpr size_t buffer_size = BUFFER_SIZE / sizeof(int32_t);
|
||||
|
||||
cl_uint threadCount = GetThreadCount();
|
||||
|
||||
float half_ulps = f->half_ulps;
|
||||
|
||||
int testingRemquo = !strcmp(f->name, "remquo");
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_half *p = (cl_half *)gIn;
|
||||
cl_half *p2 = (cl_half *)gIn2;
|
||||
for (size_t j = 0; j < buffer_size; j++)
|
||||
{
|
||||
p[j] = (cl_half)genrand_int32(d);
|
||||
p2[j] = (cl_half)genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
buffer_size * sizeof(cl_half), gIn, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
buffer_size * sizeof(cl_half), gIn2,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, BUFFER_SIZE,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
|
||||
memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||
CL_FALSE, 0, BUFFER_SIZE,
|
||||
gOut2[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
|
||||
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// align working group size with the bigger output type
|
||||
size_t vectorSize = sizeValues[j] * sizeof(int32_t);
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||
|
||||
if (threadCount > 1)
|
||||
{
|
||||
ComputeReferenceInfoF cri;
|
||||
cri.x = p;
|
||||
cri.y = p2;
|
||||
cri.r = (cl_half *)gOut_Ref;
|
||||
cri.i = (int32_t *)gOut_Ref2;
|
||||
cri.f_ffpI = f->func.f_ffpI;
|
||||
cri.lim = buffer_size;
|
||||
cri.count = (cri.lim + threadCount - 1) / threadCount;
|
||||
ThreadPool_Do(ReferenceF, threadCount, &cri);
|
||||
}
|
||||
else
|
||||
{
|
||||
cl_half *r = (cl_half *)gOut_Ref;
|
||||
int32_t *r2 = (int32_t *)gOut_Ref2;
|
||||
for (size_t j = 0; j < buffer_size; j++)
|
||||
r[j] =
|
||||
HFF((float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), r2 + j));
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking =
|
||||
(j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
|
||||
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, 0,
|
||||
BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray2 failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data
|
||||
cl_half *t = (cl_half *)gOut_Ref;
|
||||
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||
for (size_t j = 0; j < buffer_size; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_half *q = (cl_half *)(gOut[k]);
|
||||
int32_t *q2 = (int32_t *)gOut2[k];
|
||||
|
||||
// Check for exact match to correctly rounded result
|
||||
if (t[j] == q[j] && t2[j] == q2[j]) continue;
|
||||
|
||||
// Check for paired NaNs
|
||||
if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j])
|
||||
continue;
|
||||
|
||||
cl_half test = ((cl_half *)q)[j];
|
||||
int correct2 = INT_MIN;
|
||||
float correct =
|
||||
(float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), &correct2);
|
||||
float err = Ulp_Error_Half(test, correct);
|
||||
int64_t iErr;
|
||||
|
||||
// in case of remquo, we only care about the sign and last
|
||||
// seven bits of integer as per the spec.
|
||||
if (testingRemquo)
|
||||
iErr = (long long)(q2[j] & 0x0000007f)
|
||||
- (long long)(correct2 & 0x0000007f);
|
||||
else
|
||||
iErr = (long long)q2[j] - (long long)correct2;
|
||||
|
||||
// For remquo, if y = 0, x is infinite, or either is NaN
|
||||
// then the standard either neglects to say what is returned
|
||||
// in iptr or leaves it undefined or implementation defined.
|
||||
int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f)
|
||||
|| IsHalfNaN(p2[j]) || IsHalfNaN(p[j]);
|
||||
if (iptrUndefined) iErr = 0;
|
||||
|
||||
int fail = !(fabsf(err) <= half_ulps && iErr == 0);
|
||||
if (ftz && fail)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, half_ulps))
|
||||
{
|
||||
fail = fail && !(test == 0.0f && iErr == 0);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
int correct3i, correct4i;
|
||||
float correct3 =
|
||||
(float)f->func.f_ffpI(0.0, HTF(p2[j]), &correct3i);
|
||||
float correct4 =
|
||||
(float)f->func.f_ffpI(-0.0, HTF(p2[j]), &correct4i);
|
||||
float err2 = Ulp_Error_Half(test, correct3);
|
||||
float err3 = Ulp_Error_Half(test, correct4);
|
||||
int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||
int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||
&& (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, half_ulps)
|
||||
|| IsHalfResultSubnormal(correct3, half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(test == 0.0f
|
||||
&& (iErr3 == 0 || iErr4 == 0));
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// try with both args as zero
|
||||
if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
int correct7i, correct8i;
|
||||
correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i);
|
||||
correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i);
|
||||
double correct7 =
|
||||
f->func.f_ffpI(0.0, -0.0, &correct7i);
|
||||
double correct8 =
|
||||
f->func.f_ffpI(-0.0, -0.0, &correct8i);
|
||||
err2 = Ulp_Error_Half(test, correct3);
|
||||
err3 = Ulp_Error_Half(test, correct4);
|
||||
float err4 = Ulp_Error_Half(test, correct7);
|
||||
float err5 = Ulp_Error_Half(test, correct8);
|
||||
iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||
iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||
int64_t iErr7 =
|
||||
(long long)q2[j] - (long long)correct7i;
|
||||
int64_t iErr8 =
|
||||
(long long)q2[j] - (long long)correct8i;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||
&& (!(fabsf(err3) <= half_ulps
|
||||
&& iErr4 == 0))
|
||||
&& (!(fabsf(err4) <= half_ulps
|
||||
&& iErr7 == 0))
|
||||
&& (!(fabsf(err5) <= half_ulps
|
||||
&& iErr8 == 0)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||
if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
|
||||
if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct3, half_ulps)
|
||||
|| IsHalfResultSubnormal(correct4, half_ulps)
|
||||
|| IsHalfResultSubnormal(correct7, half_ulps)
|
||||
|| IsHalfResultSubnormal(correct8, half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(test == 0.0f
|
||||
&& (iErr3 == 0 || iErr4 == 0
|
||||
|| iErr7 == 0 || iErr8 == 0));
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
int correct3i, correct4i;
|
||||
double correct3 =
|
||||
f->func.f_ffpI(HTF(p[j]), 0.0, &correct3i);
|
||||
double correct4 =
|
||||
f->func.f_ffpI(HTF(p[j]), -0.0, &correct4i);
|
||||
float err2 = Ulp_Error_Half(test, correct3);
|
||||
float err3 = Ulp_Error_Half(test, correct4);
|
||||
int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||
int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||
&& (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, half_ulps)
|
||||
|| IsHalfResultSubnormal(correct3, half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(test == 0.0f
|
||||
&& (iErr3 == 0 || iErr4 == 0));
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fabsf(err) > maxError)
|
||||
{
|
||||
maxError = fabsf(err);
|
||||
maxErrorVal = HTF(p[j]);
|
||||
}
|
||||
if (llabs(iErr) > maxError2)
|
||||
{
|
||||
maxError2 = llabs(iErr);
|
||||
maxErrorVal2 = HTF(p[j]);
|
||||
}
|
||||
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: {%f, %" PRId64
|
||||
"} ulp error at {%a, %a} "
|
||||
"({0x%04x, 0x%04x}): *{%a, %d} ({0x%04x, "
|
||||
"0x%8.8x}) vs. {%a, %d} ({0x%04x, 0x%8.8x})\n",
|
||||
f->name, sizeNames[k], err, iErr, HTF(p[j]),
|
||||
HTF(p2[j]), p[j], p2[j], HTF(t[j]), t2[j], t[j],
|
||||
t2[j], HTF(test), q2[j], test, q2[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||
" bufferSize:%10d \n",
|
||||
i, step, BUFFER_SIZE);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
|
||||
maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -27,8 +27,11 @@ const char *GetTypeName(ParameterType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Half: return "half";
|
||||
case ParameterType::Float: return "float";
|
||||
case ParameterType::Double: return "double";
|
||||
case ParameterType::Short: return "short";
|
||||
case ParameterType::UShort: return "ushort";
|
||||
case ParameterType::Int: return "int";
|
||||
case ParameterType::UInt: return "uint";
|
||||
case ParameterType::Long: return "long";
|
||||
@@ -41,9 +44,13 @@ const char *GetUndefValue(ParameterType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Half:
|
||||
case ParameterType::Float:
|
||||
case ParameterType::Double: return "NAN";
|
||||
|
||||
case ParameterType::Short:
|
||||
case ParameterType::UShort: return "0x5678";
|
||||
|
||||
case ParameterType::Int:
|
||||
case ParameterType::UInt: return "0x12345678";
|
||||
|
||||
@@ -71,14 +78,17 @@ void EmitEnableExtension(std::ostringstream &kernel,
|
||||
const std::initializer_list<ParameterType> &types)
|
||||
{
|
||||
bool needsFp64 = false;
|
||||
bool needsFp16 = false;
|
||||
|
||||
for (const auto &type : types)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Double: needsFp64 = true; break;
|
||||
|
||||
case ParameterType::Half: needsFp16 = true; break;
|
||||
case ParameterType::Float:
|
||||
case ParameterType::Short:
|
||||
case ParameterType::UShort:
|
||||
case ParameterType::Int:
|
||||
case ParameterType::UInt:
|
||||
case ParameterType::Long:
|
||||
@@ -89,6 +99,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
|
||||
}
|
||||
|
||||
if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
}
|
||||
|
||||
std::string GetBuildOptions(bool relaxed_mode)
|
||||
|
||||
@@ -36,8 +36,11 @@ using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
|
||||
// Types supported for kernel code generation.
|
||||
enum class ParameterType
|
||||
{
|
||||
Half,
|
||||
Float,
|
||||
Double,
|
||||
Short,
|
||||
UShort,
|
||||
Int,
|
||||
UInt,
|
||||
Long,
|
||||
@@ -91,4 +94,5 @@ using SourceGenerator = std::string (*)(const std::string &kernel_name,
|
||||
cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
|
||||
SourceGenerator generator);
|
||||
|
||||
|
||||
#endif /* COMMON_H */
|
||||
|
||||
@@ -29,36 +29,41 @@
|
||||
// Only use ulps information in spir test
|
||||
#ifdef FUNCTION_LIST_ULPS_ONLY
|
||||
|
||||
#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type) \
|
||||
{ \
|
||||
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
||||
_ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||
_ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||
RELAXED_OFF, _type \
|
||||
}
|
||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type, \
|
||||
_relaxed_embedded_ulp) \
|
||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
|
||||
_type, _relaxed_embedded_ulp) \
|
||||
{ \
|
||||
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
||||
_ulp, _ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, \
|
||||
_rmode, RELAXED_ON, _type \
|
||||
_ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp, \
|
||||
_relaxed_embedded_ulp, _rmode, RELAXED_ON, _type \
|
||||
}
|
||||
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
{ \
|
||||
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL }, \
|
||||
{ NULL }, { NULL }, _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, \
|
||||
_rmode, RELAXED_OFF, _type \
|
||||
{ NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY, \
|
||||
INFINITY, _rmode, RELAXED_OFF, _type \
|
||||
}
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp, \
|
||||
_rmode, _type) \
|
||||
{ \
|
||||
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||
_half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
|
||||
_type \
|
||||
}
|
||||
|
||||
#define unaryF NULL
|
||||
#define unaryOF NULL
|
||||
#define i_unaryF NULL
|
||||
#define unaryF_u NULL
|
||||
#define macro_unaryF NULL
|
||||
#define binaryF NULL
|
||||
#define binaryOF NULL
|
||||
#define binaryF_nextafter NULL
|
||||
#define binaryOperatorF NULL
|
||||
#define binaryF_i NULL
|
||||
#define macro_binaryF NULL
|
||||
@@ -76,31 +81,34 @@
|
||||
|
||||
#else // FUNCTION_LIST_ULPS_ONLY
|
||||
|
||||
#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type) \
|
||||
{ \
|
||||
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
||||
{ (void*)reference_##_name##l }, { (void*)reference_##_name }, \
|
||||
_ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||
_ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||
RELAXED_OFF, _type \
|
||||
}
|
||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type, \
|
||||
_relaxed_embedded_ulp) \
|
||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
|
||||
_type, _relaxed_embedded_ulp) \
|
||||
{ \
|
||||
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
||||
{ (void*)reference_##_name##l }, \
|
||||
{ (void*)reference_##relaxed_##_name }, _ulp, _ulp, _embedded_ulp, \
|
||||
_relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type \
|
||||
{ (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp, \
|
||||
_embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode, \
|
||||
RELAXED_ON, _type \
|
||||
}
|
||||
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
{ \
|
||||
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), \
|
||||
{ (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||
_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
|
||||
_type \
|
||||
}
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) \
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp, \
|
||||
_rmode, _type) \
|
||||
{ \
|
||||
STRINGIFY(_name), _operator, { (void*)reference_##_name }, \
|
||||
{ (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, \
|
||||
{ (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp, \
|
||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||
}
|
||||
|
||||
@@ -108,85 +116,114 @@ static constexpr vtbl _unary = {
|
||||
"unary",
|
||||
TestFunc_Float_Float,
|
||||
TestFunc_Double_Double,
|
||||
TestFunc_Half_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _unaryof = { "unaryof", TestFunc_Float_Float, NULL,
|
||||
NULL };
|
||||
|
||||
static constexpr vtbl _i_unary = {
|
||||
"i_unary",
|
||||
TestFunc_Int_Float,
|
||||
TestFunc_Int_Double,
|
||||
TestFunc_Int_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _unary_u = {
|
||||
"unary_u",
|
||||
TestFunc_Float_UInt,
|
||||
TestFunc_Double_ULong,
|
||||
TestFunc_Half_UShort,
|
||||
};
|
||||
|
||||
static constexpr vtbl _macro_unary = {
|
||||
"macro_unary",
|
||||
TestMacro_Int_Float,
|
||||
TestMacro_Int_Double,
|
||||
TestMacro_Int_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _binary = {
|
||||
"binary",
|
||||
TestFunc_Float_Float_Float,
|
||||
TestFunc_Double_Double_Double,
|
||||
TestFunc_Half_Half_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _binary_nextafter = {
|
||||
"binary",
|
||||
TestFunc_Float_Float_Float,
|
||||
TestFunc_Double_Double_Double,
|
||||
TestFunc_Half_Half_Half_nextafter,
|
||||
};
|
||||
|
||||
static constexpr vtbl _binaryof = { "binaryof", TestFunc_Float_Float_Float,
|
||||
NULL, NULL };
|
||||
|
||||
static constexpr vtbl _binary_operator = {
|
||||
"binaryOperator",
|
||||
TestFunc_Float_Float_Float_Operator,
|
||||
TestFunc_Double_Double_Double_Operator,
|
||||
TestFunc_Half_Half_Half_Operator,
|
||||
};
|
||||
|
||||
static constexpr vtbl _binary_i = {
|
||||
"binary_i",
|
||||
TestFunc_Float_Float_Int,
|
||||
TestFunc_Double_Double_Int,
|
||||
TestFunc_Half_Half_Int,
|
||||
};
|
||||
|
||||
static constexpr vtbl _macro_binary = {
|
||||
"macro_binary",
|
||||
TestMacro_Int_Float_Float,
|
||||
TestMacro_Int_Double_Double,
|
||||
TestMacro_Int_Half_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _ternary = {
|
||||
"ternary",
|
||||
TestFunc_Float_Float_Float_Float,
|
||||
TestFunc_Double_Double_Double_Double,
|
||||
TestFunc_Half_Half_Half_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _unary_two_results = {
|
||||
"unary_two_results",
|
||||
TestFunc_Float2_Float,
|
||||
TestFunc_Double2_Double,
|
||||
TestFunc_Half2_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _unary_two_results_i = {
|
||||
"unary_two_results_i",
|
||||
TestFunc_FloatI_Float,
|
||||
TestFunc_DoubleI_Double,
|
||||
TestFunc_HalfI_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _binary_two_results_i = {
|
||||
"binary_two_results_i",
|
||||
TestFunc_FloatI_Float_Float,
|
||||
TestFunc_DoubleI_Double_Double,
|
||||
TestFunc_HalfI_Half_Half,
|
||||
};
|
||||
|
||||
static constexpr vtbl _mad_tbl = {
|
||||
"ternary",
|
||||
TestFunc_mad_Float,
|
||||
TestFunc_mad_Double,
|
||||
TestFunc_mad_Half,
|
||||
};
|
||||
|
||||
#define unaryF &_unary
|
||||
#define unaryOF &_unaryof
|
||||
#define i_unaryF &_i_unary
|
||||
#define unaryF_u &_unary_u
|
||||
#define macro_unaryF &_macro_unary
|
||||
#define binaryF &_binary
|
||||
#define binaryF_nextafter &_binary_nextafter
|
||||
#define binaryOF &_binaryof
|
||||
#define binaryOperatorF &_binary_operator
|
||||
#define binaryF_i &_binary_i
|
||||
#define macro_binaryF &_macro_binary
|
||||
@@ -199,24 +236,24 @@ static constexpr vtbl _mad_tbl = {
|
||||
#endif // FUNCTION_LIST_ULPS_ONLY
|
||||
|
||||
const Func functionList[] = {
|
||||
ENTRY_EXT(acos, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(asin, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(atan, 5.0f, 5.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(acosh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(acospi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(asinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(asinpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||
ENTRY(atanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(atanpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(atan2, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||
ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
|
||||
ENTRY(copysign, 0.0f, 0.0f, 0.f, FTZ_OFF, binaryF),
|
||||
ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
0.00048828125f), // relaxed ulp 2^-11
|
||||
ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(cospi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(cospi, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
0.00048828125f), // relaxed ulp 2^-11
|
||||
// ENTRY( erfc, 16.0f,
|
||||
// 16.0f, FTZ_OFF, unaryF),
|
||||
@@ -225,81 +262,84 @@ const Func functionList[] = {
|
||||
// 16.0f, 16.0f, FTZ_OFF,
|
||||
// unaryF), //disabled for 1.0 due to lack
|
||||
// of reference implementation
|
||||
ENTRY_EXT(exp, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(exp, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF,
|
||||
4.0f), // relaxed error is actually overwritten in unary.c as it
|
||||
// is 3+floor(fabs(2*x))
|
||||
ENTRY_EXT(exp2, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(exp2, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF,
|
||||
4.0f), // relaxed error is actually overwritten in unary.c as it
|
||||
// is 3+floor(fabs(2*x))
|
||||
ENTRY_EXT(exp10, 3.0f, 4.0f, 8192.0f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(exp10, 3.0f, 4.0f, 2.f, 8192.0f, FTZ_OFF, unaryF,
|
||||
8192.0f), // relaxed error is actually overwritten in unary.c as
|
||||
// it is 3+floor(fabs(2*x)) in derived mode,
|
||||
// in non-derived mode it uses the ulp error for half_exp10.
|
||||
ENTRY(expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
||||
ENTRY(fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fmod, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY(frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY(hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
||||
ENTRY(isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
||||
ENTRY(lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY_EXT(log, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
ENTRY(expm1, 3.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||
ENTRY(fabs, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(fdim, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(floor, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(fma, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
||||
ENTRY(fmax, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fmin, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fmod, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(fract, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY(frexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY(hypot, 4.0f, 4.0f, 2.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(ilogb, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
||||
ENTRY(isequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isfinite, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isinf, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isless, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(islessequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isnan, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isnormal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(isunordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY(ldexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(lgamma, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
||||
ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, FTZ_OFF,
|
||||
unaryF_two_results_i),
|
||||
ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||
ENTRY_EXT(log2, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||
ENTRY_EXT(log10, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||
ENTRY(log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function,
|
||||
ENTRY(log1p, 2.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(logb, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF,
|
||||
mad_function,
|
||||
INFINITY), // in fast-relaxed-math mode it has to be either
|
||||
// exactly rounded fma or exactly rounded a*b+c
|
||||
ENTRY(maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
||||
ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF,
|
||||
ENTRY(maxmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(minmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(modf, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY(nan, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
||||
ENTRY(nextafter, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
|
||||
ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 8192.0f, FTZ_OFF, binaryF,
|
||||
8192.0f), // in derived mode the ulp error is calculated as
|
||||
// exp2(y*log2(x)) and in non-derived it is the same as
|
||||
// half_pow
|
||||
ENTRY(pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
|
||||
// ENTRY( reciprocal, 1.0f,
|
||||
// 1.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||
ENTRY(rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(round, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY_EXT(sin, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||
ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(rootn, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY(round, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(signbit, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
0.00048828125f), // relaxed ulp 2^-11
|
||||
ENTRY_EXT(sincos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF_two_results,
|
||||
ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF,
|
||||
unaryF_two_results,
|
||||
0.00048828125f), // relaxed ulp 2^-11
|
||||
ENTRY(sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(sinpi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
ENTRY(sinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||
0.00048828125f), // relaxed ulp 2^-11
|
||||
{ "sqrt",
|
||||
"sqrt",
|
||||
@@ -308,6 +348,7 @@ const Func functionList[] = {
|
||||
{ NULL },
|
||||
3.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
4.0f,
|
||||
INFINITY,
|
||||
INFINITY,
|
||||
@@ -322,41 +363,42 @@ const Func functionList[] = {
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
INFINITY,
|
||||
INFINITY,
|
||||
FTZ_OFF,
|
||||
RELAXED_OFF,
|
||||
unaryF },
|
||||
ENTRY_EXT(
|
||||
tan, 5.0f, 5.0f, 8192.0f, FTZ_OFF, unaryF,
|
||||
tan, 5.0f, 5.0f, 2.0f, 8192.0f, FTZ_OFF, unaryF,
|
||||
8192.0f), // in derived mode it the ulp error is calculated as sin/cos
|
||||
// and in non-derived mode it is the same as half_tan.
|
||||
ENTRY(tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(tanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(tanpi, 6.0f, 6.0f, 2.0f, FTZ_OFF, unaryF),
|
||||
// ENTRY( tgamma, 16.0f,
|
||||
// 16.0f, FTZ_OFF, unaryF),
|
||||
// // Commented this out until we can be
|
||||
// sure this requirement is realistic
|
||||
ENTRY(trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY(trunc, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
|
||||
HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
|
||||
HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
|
||||
HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||
|
||||
// basic operations
|
||||
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
{ "divide",
|
||||
"/",
|
||||
{ (void*)reference_divide },
|
||||
@@ -364,6 +406,7 @@ const Func functionList[] = {
|
||||
{ (void*)reference_relaxed_divide },
|
||||
2.5f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
3.0f,
|
||||
2.5f,
|
||||
INFINITY,
|
||||
@@ -378,15 +421,16 @@ const Func functionList[] = {
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.f,
|
||||
INFINITY,
|
||||
FTZ_OFF,
|
||||
RELAXED_OFF,
|
||||
binaryOperatorF },
|
||||
OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, FTZ_OFF,
|
||||
OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, FTZ_OFF,
|
||||
unaryF), // A simple copy operation
|
||||
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
};
|
||||
|
||||
const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
|
||||
|
||||
@@ -70,6 +70,9 @@ struct vtbl
|
||||
int (*DoubleTestFunc)(
|
||||
const struct Func *, MTdata,
|
||||
bool); // may be NULL if function is single precision only
|
||||
int (*HalfTestFunc)(
|
||||
const struct Func *, MTdata,
|
||||
bool); // may be NULL if function is single precision only
|
||||
};
|
||||
|
||||
struct Func
|
||||
@@ -82,6 +85,7 @@ struct Func
|
||||
fptr rfunc;
|
||||
float float_ulps;
|
||||
float double_ulps;
|
||||
float half_ulps;
|
||||
float float_embedded_ulps;
|
||||
float relaxed_error;
|
||||
float relaxed_embedded_error;
|
||||
|
||||
220
test_conformance/math_brute_force/i_unary_half.cpp
Normal file
220
test_conformance/math_brute_force/i_unary_half.cpp
Normal file
@@ -0,0 +1,220 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <cinttypes>
|
||||
|
||||
namespace {
|
||||
|
||||
static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||
void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Int_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
Programs programs;
|
||||
KernelMatrix kernels;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
int ftz = f->ftz || 0 == (gHalfCapabilities & CL_FP_DENORM) || gForceFTZ;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
|
||||
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||
size_t bufferSizeIn = bufferElements * sizeof(cl_half);
|
||||
size_t bufferSizeOut = bufferElements * sizeof(cl_int);
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
// This test is not using ThreadPool so we need to disable FTZ here
|
||||
// for reference computations
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ(&oldMode);
|
||||
std::shared_ptr<int> at_scope_exit(
|
||||
nullptr, [&oldMode](int *) { RestoreFPState(&oldMode); });
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
}
|
||||
std::vector<float> s(bufferElements);
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn;
|
||||
|
||||
for (size_t j = 0; j < bufferElements; j++) p[j] = (cl_ushort)i + j;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSizeIn, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, bufferSizeOut);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, bufferSizeOut,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSizeOut,
|
||||
0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_int);
|
||||
size_t localCount = (bufferSizeOut + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
int *r = (int *)gOut_Ref;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
s[j] = HTF(p[j]);
|
||||
r[j] = f->func.i_f(s[j]);
|
||||
}
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
bufferSizeOut, gOut[j], 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data
|
||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
if (ftz && IsHalfSubnormal(p[j]))
|
||||
{
|
||||
unsigned int correct0 = f->func.i_f(0.0);
|
||||
unsigned int correct1 = f->func.i_f(-0.0);
|
||||
if (q[j] == correct0 || q[j] == correct1) continue;
|
||||
}
|
||||
|
||||
uint32_t err = t[j] - q[j];
|
||||
if (q[j] > t[j]) err = q[j] - t[j];
|
||||
vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%04x): "
|
||||
"*%d vs. %d\n",
|
||||
f->name, sizeNames[k], err, s[j], p[j], t[j],
|
||||
q[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||
" bufferSize:%10zd \n",
|
||||
i, step, bufferSizeOut);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -185,8 +185,7 @@ const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
|
||||
@@ -176,8 +176,7 @@ const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
|
||||
540
test_conformance/math_brute_force/macro_binary_half.cpp
Normal file
540
test_conformance/math_brute_force/macro_binary_half.cpp
Normal file
@@ -0,0 +1,540 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetBinaryKernel(kernel_name, builtin, ParameterType::Short,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
struct ThreadInfo
|
||||
{
|
||||
clMemWrapper inBuf; // input buffer for the thread
|
||||
clMemWrapper inBuf2; // input buffer for the thread
|
||||
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
MTdataHolder d;
|
||||
clCommandQueueWrapper
|
||||
tQueue; // per thread command queue to improve performance
|
||||
};
|
||||
|
||||
struct TestInfoBase
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
};
|
||||
|
||||
struct TestInfo : public TestInfoBase
|
||||
{
|
||||
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
};
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
const cl_half specialValuesHalf[] = {
|
||||
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
MTdata d = tinfo->d;
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
cl_short *t, *r;
|
||||
std::vector<float> s(0), s2(0);
|
||||
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_short *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_size, 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||
cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount =
|
||||
specialValuesHalfCount * specialValuesHalfCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
{ // test edge cases
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = specialValuesHalf[x];
|
||||
p2[j] = specialValuesHalf[y];
|
||||
if (++x >= specialValuesHalfCount)
|
||||
{
|
||||
x = 0;
|
||||
y++;
|
||||
if (y >= specialValuesHalfCount) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Init any remaining values.
|
||||
for (; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = (cl_ushort)genrand_int32(d);
|
||||
p2[j] = (cl_ushort)genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||
buffer_size, p2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
&pattern, sizeof(pattern), 0,
|
||||
buffer_size, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||
&tinfo->inBuf2)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
|
||||
t = (cl_short *)r;
|
||||
s.resize(buffer_elements);
|
||||
s2.resize(buffer_elements);
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = cl_half_to_float(p[j]);
|
||||
s2[j] = cl_half_to_float(p2[j]);
|
||||
r[j] = (short)func.i_ff(s[j], s2[j]);
|
||||
}
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||
// an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
CL_TRUE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify data
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
cl_short *q = (cl_short *)out[0];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
|
||||
{
|
||||
if (ftz)
|
||||
{
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
short correct = (short)func.i_ff(0.0f, 0.0f);
|
||||
short correct2 = (short)func.i_ff(0.0f, -0.0f);
|
||||
short correct3 = (short)func.i_ff(-0.0f, 0.0f);
|
||||
short correct4 = (short)func.i_ff(-0.0f, -0.0f);
|
||||
|
||||
if (correct == q[j] || correct2 == q[j]
|
||||
|| correct3 == q[j] || correct4 == q[j])
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
short correct = (short)func.i_ff(0.0f, s2[j]);
|
||||
short correct2 = (short)func.i_ff(-0.0f, s2[j]);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
short correct = (short)func.i_ff(s[j], 0.0f);
|
||||
short correct2 = (short)func.i_ff(s[j], -0.0f);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
|
||||
short err = t[j] - q[j];
|
||||
if (q[j] > t[j]) err = q[j] - t[j];
|
||||
vlog_error(
|
||||
"\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
|
||||
"(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x (index: %d)\n",
|
||||
name, err, s[j], p[j], s2[j], p2[j], t[j], q[j], j);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||
k++)
|
||||
{
|
||||
q = out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (-t[j] != q[j])
|
||||
{
|
||||
if (ftz)
|
||||
{
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
short correct = (short)-func.i_ff(0.0f, 0.0f);
|
||||
short correct2 = (short)-func.i_ff(0.0f, -0.0f);
|
||||
short correct3 = (short)-func.i_ff(-0.0f, 0.0f);
|
||||
short correct4 = (short)-func.i_ff(-0.0f, -0.0f);
|
||||
|
||||
if (correct == q[j] || correct2 == q[j]
|
||||
|| correct3 == q[j] || correct4 == q[j])
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
short correct = (short)-func.i_ff(0.0f, s2[j]);
|
||||
short correct2 = (short)-func.i_ff(-0.0f, s2[j]);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(p2[j]))
|
||||
{
|
||||
short correct = (short)-func.i_ff(s[j], 0.0f);
|
||||
short correct2 = (short)-func.i_ff(s[j], -0.0f);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
|
||||
cl_ushort err = -t[j] - q[j];
|
||||
if (q[j] > -t[j]) err = q[j] + t[j];
|
||||
vlog_error("\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
|
||||
"(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x "
|
||||
"(index: %d)\n",
|
||||
name, err, s[j], p[j], s2[j], p2[j], -t[j], q[j], j);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfoBase test_info_base;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||
TestInfo test_info(test_info_base);
|
||||
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
test_info.tinfo[i].inBuf2 =
|
||||
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||
"for region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
427
test_conformance/math_brute_force/macro_unary_half.cpp
Normal file
427
test_conformance/math_brute_force/macro_unary_half.cpp
Normal file
@@ -0,0 +1,427 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Short,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
struct ThreadInfo
|
||||
{
|
||||
clMemWrapper inBuf; // input buffer for the thread
|
||||
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
clCommandQueueWrapper
|
||||
tQueue; // per thread command queue to improve performance
|
||||
};
|
||||
|
||||
struct TestInfoBase
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
};
|
||||
|
||||
struct TestInfo : public TestInfoBase
|
||||
{
|
||||
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
};
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||
cl_uint scale = job->scale;
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
fptr func = job->f->func;
|
||||
int ftz = job->ftz;
|
||||
cl_uint j, k;
|
||||
cl_int error = CL_SUCCESS;
|
||||
const char *name = job->f->name;
|
||||
std::vector<float> s(0);
|
||||
|
||||
int signbit_test = 0;
|
||||
if (!strcmp(name, "signbit")) signbit_test = 1;
|
||||
|
||||
#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
|
||||
|
||||
// start the map of the output arrays
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_short *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_size, 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
// Write the new values to the input array
|
||||
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
&pattern, sizeof(pattern), 0,
|
||||
buffer_size, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_short *r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_short *t = (cl_short *)r;
|
||||
s.resize(buffer_elements);
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = cl_half_to_float(p[j]);
|
||||
if (!strcmp(name, "isnormal"))
|
||||
{
|
||||
if ((IsHalfSubnormal(p[j]) == 0) && !((p[j] & 0x7fffU) >= 0x7c00U)
|
||||
&& ((p[j] & 0x7fffU) != 0x0000U))
|
||||
r[j] = 1;
|
||||
else
|
||||
r[j] = 0;
|
||||
}
|
||||
else
|
||||
r[j] = (short)ref_func(s[j]);
|
||||
}
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||
// an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
// Wait for the last buffer
|
||||
out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
CL_TRUE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify data
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
cl_short *q = out[0];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
|
||||
{
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (ftz)
|
||||
{
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
short correct = (short)ref_func(+0.0f);
|
||||
short correct2 = (short)ref_func(-0.0f);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
|
||||
short err = t[j] - q[j];
|
||||
if (q[j] > t[j]) err = q[j] - t[j];
|
||||
vlog_error("\nERROR: %s: %d ulp error at %a (0x%04x)\nExpected: "
|
||||
"%d vs. %d\n",
|
||||
name, err, s[j], p[j], t[j], q[j]);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||
k++)
|
||||
{
|
||||
q = out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (-t[j] != q[j])
|
||||
{
|
||||
if (ftz)
|
||||
{
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
short correct = (short)-ref_func(+0.0f);
|
||||
short correct2 = (short)-ref_func(-0.0f);
|
||||
if (correct == q[j] || correct2 == q[j]) continue;
|
||||
}
|
||||
}
|
||||
|
||||
short err = -t[j] - q[j];
|
||||
if (q[j] > -t[j]) err = q[j] + t[j];
|
||||
vlog_error("\nERROR: %s%s: %d ulp error at %a "
|
||||
"(0x%04x)\nExpected: %d \nActual: %d\n",
|
||||
name, sizeNames[k], err, s[j], p[j], -t[j], q[j]);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestMacro_Int_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfoBase test_info_base;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
// Init test_info
|
||||
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||
TestInfo test_info(test_info_base);
|
||||
|
||||
test_info.threadCount = GetThreadCount();
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount =
|
||||
std::max((cl_uint)1,
|
||||
(cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||
"for region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
201
test_conformance/math_brute_force/mad_half.cpp
Normal file
201
test_conformance/math_brute_force/mad_half.cpp
Normal file
@@ -0,0 +1,201 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_mad_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
Programs programs;
|
||||
KernelMatrix kernels;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
float maxError = 0.0f;
|
||||
|
||||
float maxErrorVal = 0.0f;
|
||||
float maxErrorVal2 = 0.0f;
|
||||
float maxErrorVal3 = 0.0f;
|
||||
size_t bufferSize = BUFFER_SIZE;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
uint64_t step = getTestStep(sizeof(cl_half), bufferSize);
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
}
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn;
|
||||
cl_ushort *p2 = (cl_ushort *)gIn2;
|
||||
cl_ushort *p3 = (cl_ushort *)gIn3;
|
||||
for (size_t j = 0; j < bufferSize / sizeof(cl_ushort); j++)
|
||||
{
|
||||
p[j] = (cl_ushort)genrand_int32(d);
|
||||
p2[j] = (cl_ushort)genrand_int32(d);
|
||||
p3[j] = (cl_ushort)genrand_int32(d);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
bufferSize, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, BUFFER_SIZE,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_half) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||
sizeof(gInBuffer3), &gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data - no verification possible. MAD is a random number
|
||||
// generator.
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
vlog(".");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("pass");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -49,6 +49,8 @@
|
||||
#include "harness/testHarness.h"
|
||||
|
||||
#define kPageSize 4096
|
||||
#define HALF_REQUIRED_FEATURES_1 (CL_FP_ROUND_TO_ZERO)
|
||||
#define HALF_REQUIRED_FEATURES_2 (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)
|
||||
#define DOUBLE_REQUIRED_FEATURES \
|
||||
(CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
|
||||
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
|
||||
@@ -81,6 +83,8 @@ static int gTestFastRelaxed = 1;
|
||||
*/
|
||||
int gFastRelaxedDerived = 1;
|
||||
static int gToggleCorrectlyRoundedDivideSqrt = 0;
|
||||
int gHasHalf = 0;
|
||||
cl_device_fp_config gHalfCapabilities = 0;
|
||||
int gDeviceILogb0 = 1;
|
||||
int gDeviceILogbNaN = 1;
|
||||
int gCheckTininessBeforeRounding = 1;
|
||||
@@ -104,6 +108,8 @@ cl_device_fp_config gFloatCapabilities = 0;
|
||||
int gWimpyReductionFactor = 32;
|
||||
int gVerboseBruteForce = 0;
|
||||
|
||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||
|
||||
static int ParseArgs(int argc, const char **argv);
|
||||
static void PrintUsage(void);
|
||||
static void PrintFunctions(void);
|
||||
@@ -167,7 +173,6 @@ static int doTest(const char *name)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if (0 == strcmp("ilogb", func_data->name))
|
||||
{
|
||||
@@ -236,6 +241,23 @@ static int doTest(const char *name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (gHasHalf && NULL != func_data->vtbl_ptr->HalfTestFunc)
|
||||
{
|
||||
gTestCount++;
|
||||
vlog("%3d: ", gTestCount);
|
||||
if (func_data->vtbl_ptr->HalfTestFunc(func_data, gMTdata,
|
||||
false /* relaxed mode*/))
|
||||
{
|
||||
gFailCount++;
|
||||
error++;
|
||||
if (gStopOnError)
|
||||
{
|
||||
gSkipRestOfTests = true;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
@@ -408,6 +430,8 @@ static int ParseArgs(int argc, const char **argv)
|
||||
|
||||
case 'm': singleThreaded ^= 1; break;
|
||||
|
||||
case 'g': gHasHalf ^= 1; break;
|
||||
|
||||
case 'r': gTestFastRelaxed ^= 1; break;
|
||||
|
||||
case 's': gStopOnError ^= 1; break;
|
||||
@@ -540,6 +564,8 @@ static void PrintUsage(void)
|
||||
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
||||
"on)\n");
|
||||
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
|
||||
vlog("\t\t-g\tToggle half precision testing. (Default: on if khr_fp_16 "
|
||||
"on)\n");
|
||||
vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
|
||||
vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
|
||||
"precision. (Default: on)\n");
|
||||
@@ -640,6 +666,54 @@ test_status InitCL(cl_device_id device)
|
||||
#endif
|
||||
}
|
||||
|
||||
gFloatToHalfRoundingMode = kRoundToNearestEven;
|
||||
if (is_extension_available(gDevice, "cl_khr_fp16"))
|
||||
{
|
||||
gHasHalf ^= 1;
|
||||
#if defined(CL_DEVICE_HALF_FP_CONFIG)
|
||||
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_HALF_FP_CONFIG,
|
||||
sizeof(gHalfCapabilities),
|
||||
&gHalfCapabilities, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"ERROR: Unable to get device CL_DEVICE_HALF_FP_CONFIG. (%d)\n",
|
||||
error);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
if (HALF_REQUIRED_FEATURES_1
|
||||
!= (gHalfCapabilities & HALF_REQUIRED_FEATURES_1)
|
||||
&& HALF_REQUIRED_FEATURES_2
|
||||
!= (gHalfCapabilities & HALF_REQUIRED_FEATURES_2))
|
||||
{
|
||||
char list[300] = "";
|
||||
if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_NEAREST))
|
||||
strncat(list, "CL_FP_ROUND_TO_NEAREST, ", sizeof(list) - 1);
|
||||
if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_ZERO))
|
||||
strncat(list, "CL_FP_ROUND_TO_ZERO, ", sizeof(list) - 1);
|
||||
if (0 == (gHalfCapabilities & CL_FP_INF_NAN))
|
||||
strncat(list, "CL_FP_INF_NAN, ", sizeof(list) - 1);
|
||||
vlog_error("ERROR: required half features are missing: %s\n", list);
|
||||
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
if ((gHalfCapabilities & CL_FP_ROUND_TO_NEAREST) != 0)
|
||||
{
|
||||
gHalfRoundingMode = CL_HALF_RTE;
|
||||
}
|
||||
else // due to above condition it must be RTZ
|
||||
{
|
||||
gHalfRoundingMode = CL_HALF_RTZ;
|
||||
}
|
||||
|
||||
#else
|
||||
vlog_error("FAIL: device says it supports cl_khr_fp16 but "
|
||||
"CL_DEVICE_HALF_FP_CONFIG is not in the headers!\n");
|
||||
return TEST_FAIL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
uint32_t deviceFrequency = 0;
|
||||
size_t configSize = sizeof(deviceFrequency);
|
||||
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
||||
@@ -828,6 +902,7 @@ test_status InitCL(cl_device_id device)
|
||||
"Bruteforce_Ulp_Error_Double() for more details.\n\n");
|
||||
}
|
||||
|
||||
vlog("\tTesting half precision? %s\n", no_yes[0 != gHasHalf]);
|
||||
vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
|
||||
if (gIsEmbedded)
|
||||
vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
|
||||
|
||||
@@ -4699,6 +4699,49 @@ double reference_nextafter(double xx, double yy)
|
||||
return a.f;
|
||||
}
|
||||
|
||||
cl_half reference_nanh(cl_ushort x)
|
||||
{
|
||||
cl_ushort u;
|
||||
cl_half h;
|
||||
u = x | 0x7e00U;
|
||||
memcpy(&h, &u, sizeof(cl_half));
|
||||
return h;
|
||||
}
|
||||
|
||||
float reference_nextafterh(float xx, float yy, bool allow_denorms)
|
||||
{
|
||||
cl_half tmp_a = cl_half_from_float(xx, CL_HALF_RTE);
|
||||
cl_half tmp_b = cl_half_from_float(yy, CL_HALF_RTE);
|
||||
float x = cl_half_to_float(tmp_a);
|
||||
float y = cl_half_to_float(tmp_b);
|
||||
|
||||
// take care of nans
|
||||
if (x != x) return x;
|
||||
|
||||
if (y != y) return y;
|
||||
|
||||
if (x == y) return y;
|
||||
|
||||
short a_h = cl_half_from_float(x, CL_HALF_RTE);
|
||||
short b_h = cl_half_from_float(y, CL_HALF_RTE);
|
||||
short oa_h = a_h;
|
||||
|
||||
if (a_h & 0x8000) a_h = 0x8000 - a_h;
|
||||
if (b_h & 0x8000) b_h = 0x8000 - b_h;
|
||||
|
||||
a_h += (a_h < b_h) ? 1 : -1;
|
||||
a_h = (a_h < 0) ? (cl_short)0x8000 - a_h : a_h;
|
||||
|
||||
if (!allow_denorms && IsHalfSubnormal(a_h))
|
||||
{
|
||||
if (cl_half_to_float(0x7fff & oa_h) < cl_half_to_float(0x7fff & a_h))
|
||||
a_h = (a_h & 0x8000) ? 0x8400 : 0x0400;
|
||||
else
|
||||
a_h = 0;
|
||||
}
|
||||
|
||||
return cl_half_to_float(a_h);
|
||||
}
|
||||
|
||||
long double reference_nextafterl(long double xx, long double yy)
|
||||
{
|
||||
|
||||
@@ -18,8 +18,10 @@
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <OpenCL/opencl.h>
|
||||
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#include "CL/cl_half.h"
|
||||
#endif
|
||||
|
||||
// -- for testing float --
|
||||
@@ -160,6 +162,8 @@ long double reference_fractl(long double, long double*);
|
||||
long double reference_fmal(long double, long double, long double);
|
||||
long double reference_madl(long double, long double, long double);
|
||||
long double reference_nextafterl(long double, long double);
|
||||
float reference_nextafterh(float, float, bool allow_denormals = true);
|
||||
cl_half reference_nanh(cl_ushort);
|
||||
long double reference_recipl(long double);
|
||||
long double reference_rootnl(long double, int);
|
||||
long double reference_rsqrtl(long double);
|
||||
|
||||
@@ -108,8 +108,7 @@ const double specialValues[] = {
|
||||
+0.0,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
@@ -118,8 +118,7 @@ const float specialValues[] = {
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesCount =
|
||||
sizeof(specialValues) / sizeof(specialValues[0]);
|
||||
constexpr size_t specialValuesCount = ARRAY_SIZE(specialValues);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
777
test_conformance/math_brute_force/ternary_half.cpp
Normal file
777
test_conformance/math_brute_force/ternary_half.cpp
Normal file
@@ -0,0 +1,777 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
|
||||
#define CORRECTLY_ROUNDED 0
|
||||
#define FLUSHED 1
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const cl_half specialValuesHalf[] = {
|
||||
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
|
||||
Programs programs;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
KernelMatrix kernels;
|
||||
float maxError = 0.0f;
|
||||
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
float maxErrorVal = 0.0f;
|
||||
float maxErrorVal2 = 0.0f;
|
||||
float maxErrorVal3 = 0.0f;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
|
||||
constexpr size_t bufferElements = BUFFER_SIZE / sizeof(cl_half);
|
||||
|
||||
cl_uchar overflow[bufferElements];
|
||||
float half_ulps = f->half_ulps;
|
||||
int skipNanInf = (0 == strcmp("fma", f->nameInCode));
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_half *hp0 = (cl_half *)gIn;
|
||||
cl_half *hp1 = (cl_half *)gIn2;
|
||||
cl_half *hp2 = (cl_half *)gIn3;
|
||||
size_t idx = 0;
|
||||
|
||||
if (i == 0)
|
||||
{ // test edge cases
|
||||
uint32_t x, y, z;
|
||||
x = y = z = 0;
|
||||
for (; idx < bufferElements; idx++)
|
||||
{
|
||||
hp0[idx] = specialValuesHalf[x];
|
||||
hp1[idx] = specialValuesHalf[y];
|
||||
hp2[idx] = specialValuesHalf[z];
|
||||
|
||||
if (++x >= specialValuesHalfCount)
|
||||
{
|
||||
x = 0;
|
||||
if (++y >= specialValuesHalfCount)
|
||||
{
|
||||
y = 0;
|
||||
if (++z >= specialValuesHalfCount) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (idx == bufferElements)
|
||||
vlog_error("Test Error: not all special cases tested!\n");
|
||||
}
|
||||
|
||||
auto any_value = [&d]() {
|
||||
float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
|
||||
return HFF((1.0f - t) * CL_HALF_MIN + t * CL_HALF_MAX);
|
||||
};
|
||||
|
||||
for (; idx < bufferElements; idx++)
|
||||
{
|
||||
hp0[idx] = any_value();
|
||||
hp1[idx] = any_value();
|
||||
hp2[idx] = any_value();
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, BUFFER_SIZE,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_half) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||
sizeof(gInBuffer3), &gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue)))
|
||||
{
|
||||
vlog("clFlush failed\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_half *res = (cl_half *)gOut_Ref;
|
||||
if (skipNanInf)
|
||||
{
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
res[j] = HFF((float)f->func.f_fma(
|
||||
HTF(hp0[j]), HTF(hp1[j]), HTF(hp2[j]), CORRECTLY_ROUNDED));
|
||||
overflow[j] =
|
||||
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
res[j] = HFF((float)f->func.f_fma(
|
||||
HTF(hp0[j]), HTF(hp1[j]), HTF(hp2[j]), CORRECTLY_ROUNDED));
|
||||
}
|
||||
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data
|
||||
uint16_t *t = (uint16_t *)gOut_Ref;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
uint16_t *q = (uint16_t *)(gOut[k]);
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
int fail;
|
||||
cl_half test = ((cl_half *)q)[j];
|
||||
float ref1 = f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
|
||||
HTF(hp2[j]), CORRECTLY_ROUNDED);
|
||||
cl_half correct = HFF(ref1);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input
|
||||
// or output is a infinity or NaN or overflow
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (overflow[j] || IsHalfInfinity(correct)
|
||||
|| IsHalfNaN(correct) || IsHalfInfinity(hp0[j])
|
||||
|| IsHalfNaN(hp0[j]) || IsHalfInfinity(hp1[j])
|
||||
|| IsHalfNaN(hp1[j]) || IsHalfInfinity(hp2[j])
|
||||
|| IsHalfNaN(hp2[j]))
|
||||
continue;
|
||||
}
|
||||
|
||||
float err =
|
||||
test != correct ? Ulp_Error_Half(test, ref1) : 0.f;
|
||||
fail = !(fabsf(err) <= half_ulps);
|
||||
|
||||
if (fail && ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2 with flushing on
|
||||
if (0.0f == test
|
||||
&& 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
|
||||
HTF(hp2[j]), FLUSHED))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (fail && IsHalfSubnormal(hp0[j]))
|
||||
{ // look at me,
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
float ref2 =
|
||||
f->func.f_fma(0.0f, HTF(hp1[j]), HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct2 = HFF(ref2);
|
||||
float ref3 =
|
||||
f->func.f_fma(-0.0f, HTF(hp1[j]), HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct3 = HFF(ref3);
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3))
|
||||
continue;
|
||||
}
|
||||
|
||||
float err2 = test != correct2
|
||||
? Ulp_Error_Half(test, ref2)
|
||||
: 0.f;
|
||||
float err3 = test != correct3
|
||||
? Ulp_Error_Half(test, ref3)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||
HTF(hp2[j]), FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||
HTF(hp2[j]), FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
// try with first two args as zero
|
||||
if (IsHalfSubnormal(hp1[j]))
|
||||
{ // its fun to have fun,
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
ref2 = f->func.f_fma(0.0f, 0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
correct2 = HFF(ref2);
|
||||
ref3 = f->func.f_fma(-0.0f, 0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
correct3 = HFF(ref3);
|
||||
float ref4 =
|
||||
f->func.f_fma(0.0f, -0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct4 = HFF(ref4);
|
||||
float ref5 =
|
||||
f->func.f_fma(-0.0f, -0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct5 = HFF(ref5);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result
|
||||
// if an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3)
|
||||
|| IsHalfInfinity(correct4)
|
||||
|| IsHalfNaN(correct4)
|
||||
|| IsHalfInfinity(correct5)
|
||||
|| IsHalfNaN(correct5))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = test != correct2
|
||||
? Ulp_Error_Half(test, ref2)
|
||||
: 0.f;
|
||||
err3 = test != correct3
|
||||
? Ulp_Error_Half(test, ref3)
|
||||
: 0.f;
|
||||
float err4 = test != correct4
|
||||
? Ulp_Error_Half(test, ref4)
|
||||
: 0.f;
|
||||
float err5 = test != correct5
|
||||
? Ulp_Error_Half(test, ref5)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps))
|
||||
&& (!(fabsf(err4) <= half_ulps))
|
||||
&& (!(fabsf(err5) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(0.0f, 0.0f,
|
||||
HTF(hp2[j]),
|
||||
FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(-0.0f, 0.0f,
|
||||
HTF(hp2[j]),
|
||||
FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(0.0f, -0.0f,
|
||||
HTF(hp2[j]),
|
||||
FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(-0.0f, -0.0f,
|
||||
HTF(hp2[j]),
|
||||
FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
if (IsHalfSubnormal(hp2[j]))
|
||||
{
|
||||
if (test == 0.0f) // 0*0+0 is 0
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (IsHalfSubnormal(hp2[j]))
|
||||
{
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
ref2 = f->func.f_fma(0.0f, HTF(hp1[j]), 0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
correct2 = HFF(ref2);
|
||||
ref3 = f->func.f_fma(-0.0f, HTF(hp1[j]), 0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
correct3 = HFF(ref3);
|
||||
float ref4 =
|
||||
f->func.f_fma(0.0f, HTF(hp1[j]), -0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct4 = HFF(ref4);
|
||||
float ref5 =
|
||||
f->func.f_fma(-0.0f, HTF(hp1[j]), -0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct5 = HFF(ref5);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result
|
||||
// if an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3)
|
||||
|| IsHalfInfinity(correct4)
|
||||
|| IsHalfNaN(correct4)
|
||||
|| IsHalfInfinity(correct5)
|
||||
|| IsHalfNaN(correct5))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = test != correct2
|
||||
? Ulp_Error_Half(test, ref2)
|
||||
: 0.f;
|
||||
err3 = test != correct3
|
||||
? Ulp_Error_Half(test, ref3)
|
||||
: 0.f;
|
||||
float err4 = test != correct4
|
||||
? Ulp_Error_Half(test, ref4)
|
||||
: 0.f;
|
||||
float err5 = test != correct5
|
||||
? Ulp_Error_Half(test, ref5)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps))
|
||||
&& (!(fabsf(err4) <= half_ulps))
|
||||
&& (!(fabsf(err5) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||
0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||
0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||
-0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||
-0.0f, FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (fail && IsHalfSubnormal(hp1[j]))
|
||||
{
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
float ref2 =
|
||||
f->func.f_fma(HTF(hp0[j]), 0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct2 = HFF(ref2);
|
||||
float ref3 =
|
||||
f->func.f_fma(HTF(hp0[j]), -0.0f, HTF(hp2[j]),
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct3 = HFF(ref3);
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3))
|
||||
continue;
|
||||
}
|
||||
|
||||
float err2 = test != correct2
|
||||
? Ulp_Error_Half(test, ref2)
|
||||
: 0.f;
|
||||
float err3 = test != correct3
|
||||
? Ulp_Error_Half(test, ref3)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||
HTF(hp2[j]), FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||
HTF(hp2[j]), FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
// try with second two args as zero
|
||||
if (IsHalfSubnormal(hp2[j]))
|
||||
{
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
ref2 = f->func.f_fma(HTF(hp0[j]), 0.0f, 0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
correct2 = HFF(ref2);
|
||||
ref3 = f->func.f_fma(HTF(hp0[j]), -0.0f, 0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
correct3 = HFF(ref3);
|
||||
float ref4 =
|
||||
f->func.f_fma(HTF(hp0[j]), 0.0f, -0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct4 = HFF(ref4);
|
||||
float ref5 =
|
||||
f->func.f_fma(HTF(hp0[j]), -0.0f, -0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct5 = HFF(ref5);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result
|
||||
// if an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (!gInfNanSupport)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3)
|
||||
|| IsHalfInfinity(correct4)
|
||||
|| IsHalfNaN(correct4)
|
||||
|| IsHalfInfinity(correct5)
|
||||
|| IsHalfNaN(correct5))
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = test != correct2
|
||||
? Ulp_Error_Half(test, ref2)
|
||||
: 0.f;
|
||||
err3 = test != correct3
|
||||
? Ulp_Error_Half(test, ref3)
|
||||
: 0.f;
|
||||
float err4 = test != correct4
|
||||
? Ulp_Error_Half(test, ref4)
|
||||
: 0.f;
|
||||
float err5 = test != correct5
|
||||
? Ulp_Error_Half(test, ref5)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps))
|
||||
&& (!(fabsf(err4) <= half_ulps))
|
||||
&& (!(fabsf(err5) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||
0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||
0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||
-0.0f, FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||
-0.0f, FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (fail && IsHalfSubnormal(hp2[j]))
|
||||
{
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
|
||||
float ref2 = f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
|
||||
0.0f, CORRECTLY_ROUNDED);
|
||||
cl_half correct2 = HFF(ref2);
|
||||
float ref3 =
|
||||
f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]), -0.0f,
|
||||
CORRECTLY_ROUNDED);
|
||||
cl_half correct3 = HFF(ref3);
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correct2)
|
||||
|| IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(correct3)
|
||||
|| IsHalfNaN(correct3))
|
||||
continue;
|
||||
}
|
||||
|
||||
float err2 = test != correct2
|
||||
? Ulp_Error_Half(test, correct2)
|
||||
: 0.f;
|
||||
float err3 = test != correct3
|
||||
? Ulp_Error_Half(test, correct3)
|
||||
: 0.f;
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= half_ulps))
|
||||
&& (!(fabsf(err3) <= half_ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (0.0f == test
|
||||
&& (0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]),
|
||||
HTF(hp1[j]), 0.0f,
|
||||
FLUSHED)
|
||||
|| 0.0f
|
||||
== f->func.f_fma(HTF(hp0[j]),
|
||||
HTF(hp1[j]), -0.0f,
|
||||
FLUSHED)))
|
||||
{
|
||||
fail = 0;
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(err) > maxError)
|
||||
{
|
||||
maxError = fabsf(err);
|
||||
maxErrorVal = HTF(hp0[j]);
|
||||
maxErrorVal2 = HTF(hp1[j]);
|
||||
maxErrorVal3 = HTF(hp2[j]);
|
||||
}
|
||||
|
||||
if (fail)
|
||||
{
|
||||
vlog_error(
|
||||
"\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
|
||||
"({0x%4.4x, 0x%4.4x, 0x%4.4x}): *%a vs. %a\n",
|
||||
f->name, sizeNames[k], err, HTF(hp0[j]),
|
||||
HTF(hp1[j]), HTF(hp2[j]), hp0[j], hp1[j], hp2[j],
|
||||
HTF(res[j]), HTF(test));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
|
||||
i, step, BUFFER_SIZE);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@@ -24,6 +24,9 @@ int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
// double foo(double)
|
||||
int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half foo(half)
|
||||
int TestFunc_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// int foo(float)
|
||||
int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
@@ -36,6 +39,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode);
|
||||
// double foo(ulong)
|
||||
int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half (Ushort)
|
||||
int TestFunc_Half_UShort(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// Returns {0, 1} for scalar and {0, -1} for vector.
|
||||
// int foo(float)
|
||||
int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
@@ -44,21 +50,34 @@ int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
// int foo(double)
|
||||
int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// int foo(half,half)
|
||||
int TestMacro_Int_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// int foo(half)
|
||||
int TestMacro_Int_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// int foo(half)
|
||||
int TestFunc_Int_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// float foo(float, float)
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// double foo(double, double)
|
||||
int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// Half foo(half, half)
|
||||
int TestFunc_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
// Special handling for nextafter.
|
||||
// float foo(float, float)
|
||||
int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
|
||||
bool relaxedMode);
|
||||
// Half foo(Half, Half)
|
||||
int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// Half foo(Half, Half)
|
||||
int TestFunc_Half_Half_Half_common(const Func *f, MTdata, int isNextafter,
|
||||
bool relaxedMode);
|
||||
|
||||
// Half foo(Half, int)
|
||||
int TestFunc_Half_Half_Int(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// Special handling for nextafter.
|
||||
// double foo(double, double)
|
||||
int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
|
||||
bool relaxedMode);
|
||||
|
||||
// float op float
|
||||
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
|
||||
@@ -68,6 +87,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
|
||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
|
||||
bool relaxedMode);
|
||||
|
||||
// half op half
|
||||
int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// float foo(float, int)
|
||||
int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
@@ -89,24 +111,36 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
|
||||
bool relaxedMode);
|
||||
|
||||
// half foo(half, half, half)
|
||||
int TestFunc_Half_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// float foo(float, float*)
|
||||
int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// double foo(double, double*)
|
||||
int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half foo(half, half*)
|
||||
int TestFunc_Half2_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// float foo(float, int*)
|
||||
int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// double foo(double, int*)
|
||||
int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half foo(half, int*)
|
||||
int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode);
|
||||
|
||||
// float foo(float, float, int*)
|
||||
int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// double foo(double, double, int*)
|
||||
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half foo(half, half, int*)
|
||||
int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode);
|
||||
|
||||
// Special handling for mad.
|
||||
// float mad(float, float, float)
|
||||
int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
@@ -115,4 +149,7 @@ int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
|
||||
// double mad(double, double, double)
|
||||
int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
// half mad(half, half, half)
|
||||
int TestFunc_mad_Half(const Func *f, MTdata, bool relaxedMode);
|
||||
|
||||
#endif
|
||||
|
||||
483
test_conformance/math_brute_force/unary_half.cpp
Normal file
483
test_conformance/math_brute_force/unary_half.cpp
Normal file
@@ -0,0 +1,483 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
// Thread specific data for a worker thread
|
||||
typedef struct ThreadInfo
|
||||
{
|
||||
clMemWrapper inBuf; // input buffer for the thread
|
||||
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||
float maxError; // max error value. Init to 0.
|
||||
double maxErrorValue; // position of the max error value. Init to 0.
|
||||
clCommandQueueWrapper
|
||||
tQueue; // per thread command queue to improve performance
|
||||
} ThreadInfo;
|
||||
|
||||
struct TestInfoBase
|
||||
{
|
||||
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||
const Func *f; // A pointer to the function info
|
||||
cl_uint threadCount; // Number of worker threads
|
||||
cl_uint jobCount; // Number of jobs
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
float ulps; // max_allowed ulps
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
int isRangeLimited; // 1 if the function is only to be evaluated over a
|
||||
// range
|
||||
float half_sin_cos_tan_limit;
|
||||
};
|
||||
|
||||
struct TestInfo : public TestInfoBase
|
||||
{
|
||||
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||
|
||||
// Array of thread specific information
|
||||
std::vector<ThreadInfo> tinfo;
|
||||
|
||||
// Programs for various vector sizes.
|
||||
Programs programs;
|
||||
|
||||
// Thread-specific kernels for each vector size:
|
||||
// k[vector_size][thread_id]
|
||||
KernelMatrix k;
|
||||
};
|
||||
|
||||
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
{
|
||||
TestInfo *job = (TestInfo *)data;
|
||||
size_t buffer_elements = job->subBufferSize;
|
||||
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||
cl_uint scale = job->scale;
|
||||
cl_uint base = job_id * (cl_uint)job->step;
|
||||
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||
float ulps = job->ulps;
|
||||
fptr func = job->f->func;
|
||||
cl_uint j, k;
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
int isRangeLimited = job->isRangeLimited;
|
||||
float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
|
||||
int ftz = job->ftz;
|
||||
|
||||
std::vector<float> s(0);
|
||||
|
||||
cl_event e[VECTOR_SIZE_COUNT];
|
||||
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||
|
||||
if (gHostFill)
|
||||
{
|
||||
// start the map of the output arrays
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (uint16_t *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||
buffer_size, 0, NULL, e + j, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||
}
|
||||
|
||||
// Write the new values to the input array
|
||||
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
p[j] = base + j * scale;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||
buffer_size, p, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if (gHostFill)
|
||||
{
|
||||
// Wait for the map to finish
|
||||
if ((error = clWaitForEvents(1, e + j)))
|
||||
{
|
||||
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clReleaseEvent(e[j])))
|
||||
{
|
||||
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry
|
||||
// over
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
&pattern, sizeof(pattern), 0,
|
||||
buffer_size, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount =
|
||||
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||
// own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||
&tinfo->outBuf[j])))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||
&tinfo->inBuf)))
|
||||
{
|
||||
LogBuildError(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||
&vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||
|
||||
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_half *r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||
s.resize(buffer_elements);
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
s[j] = (float)cl_half_to_float(p[j]);
|
||||
r[j] = HFF(func.f_f(s[j]));
|
||||
}
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||
// an in order queue.
|
||||
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
out[j] = (uint16_t *)clEnqueueMapBuffer(
|
||||
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
// Wait for the last buffer
|
||||
out[j] = (uint16_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||
CL_TRUE, CL_MAP_READ, 0,
|
||||
buffer_size, 0, NULL, NULL, &error);
|
||||
if (error || NULL == out[j])
|
||||
{
|
||||
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify data
|
||||
for (j = 0; j < buffer_elements; j++)
|
||||
{
|
||||
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ushort *q = out[k];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (r[j] != q[j])
|
||||
{
|
||||
float test = cl_half_to_float(q[j]);
|
||||
double correct = func.f_f(s[j]);
|
||||
float err = Ulp_Error_Half(q[j], correct);
|
||||
int fail = !(fabsf(err) <= ulps);
|
||||
|
||||
// half_sin/cos/tan are only valid between +-2**16, Inf, NaN
|
||||
if (isRangeLimited
|
||||
&& fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
|
||||
&& fabsf(s[j]) < INFINITY)
|
||||
{
|
||||
if (fabsf(test) <= half_sin_cos_tan_limit)
|
||||
{
|
||||
err = 0;
|
||||
fail = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (fail)
|
||||
{
|
||||
if (ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(p[j]))
|
||||
{
|
||||
double correct2 = func.f_f(0.0);
|
||||
double correct3 = func.f_f(-0.0);
|
||||
float err2 = Ulp_Error_Half(q[j], correct2);
|
||||
float err3 = Ulp_Error_Half(q[j], correct3);
|
||||
fail = fail
|
||||
&& ((!(fabsf(err2) <= ulps))
|
||||
&& (!(fabsf(err3) <= ulps)));
|
||||
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(correct2, ulps)
|
||||
|| IsHalfResultSubnormal(correct3, ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fabsf(err) > tinfo->maxError)
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: %f ulp error at %a "
|
||||
"(half 0x%04x)\nExpected: %a (half 0x%04x) "
|
||||
"\nActual: %a (half 0x%04x)\n",
|
||||
job->f->name, sizeNames[k], err, s[j], p[j],
|
||||
cl_half_to_float(r[j]), r[j], test, q[j]);
|
||||
error = -1;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||
out[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||
j, error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||
|
||||
|
||||
if (0 == (base & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
|
||||
"ThreadCount:%2u\n",
|
||||
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||
job->threadCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
TestInfoBase test_info_base;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
float maxError = 0.0f;
|
||||
double maxErrorVal = 0.0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
// Init test_info
|
||||
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||
TestInfo test_info(test_info_base);
|
||||
|
||||
test_info.threadCount = GetThreadCount();
|
||||
|
||||
test_info.subBufferSize = BUFFER_SIZE
|
||||
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||
test_info.scale = getTestScale(sizeof(cl_half));
|
||||
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||
{
|
||||
// there was overflow
|
||||
test_info.jobCount = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
test_info.jobCount =
|
||||
std::max((cl_uint)1,
|
||||
(cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
|
||||
}
|
||||
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->half_ulps;
|
||||
test_info.ftz =
|
||||
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
|
||||
test_info.tinfo.resize(test_info.threadCount);
|
||||
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
cl_buffer_region region = { i * test_info.subBufferSize
|
||||
* sizeof(cl_half),
|
||||
test_info.subBufferSize * sizeof(cl_half) };
|
||||
test_info.tinfo[i].inBuf =
|
||||
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||
"region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||
®ion, &error);
|
||||
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||
{
|
||||
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||
"for region {%zd, %zd}\n",
|
||||
region.origin, region.size);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
test_info.tinfo[i].tQueue =
|
||||
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||
{
|
||||
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for special cases for unary float
|
||||
test_info.isRangeLimited = 0;
|
||||
test_info.half_sin_cos_tan_limit = 0;
|
||||
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit = 1.0f
|
||||
+ test_info.ulps
|
||||
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||
// inputs must be in [-1,1]
|
||||
}
|
||||
else if (0 == strcmp(f->name, "half_tan"))
|
||||
{
|
||||
test_info.isRangeLimited = 1;
|
||||
test_info.half_sin_cos_tan_limit =
|
||||
INFINITY; // out of range resut from finite inputs must be numeric
|
||||
}
|
||||
|
||||
// Init the kernels
|
||||
{
|
||||
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode };
|
||||
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info);
|
||||
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for (i = 0; i < test_info.threadCount; i++)
|
||||
{
|
||||
if (test_info.tinfo[i].maxError > maxError)
|
||||
{
|
||||
maxError = test_info.tinfo[i].maxError;
|
||||
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||
}
|
||||
}
|
||||
|
||||
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
452
test_conformance/math_brute_force/unary_two_results_half.cpp
Normal file
452
test_conformance/math_brute_force/unary_two_results_half.cpp
Normal file
@@ -0,0 +1,452 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Half, ParameterType::Half,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
Programs programs;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
KernelMatrix kernels;
|
||||
float maxError0 = 0.0f;
|
||||
float maxError1 = 0.0f;
|
||||
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
float maxErrorVal0 = 0.0f;
|
||||
float maxErrorVal1 = 0.0f;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
|
||||
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
|
||||
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||
size_t bufferSize = bufferElements * sizeof(cl_half);
|
||||
|
||||
std::vector<cl_uchar> overflow(bufferElements);
|
||||
int isFract = 0 == strcmp("fract", f->nameInCode);
|
||||
int skipNanInf = isFract;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
float half_ulps = f->half_ulps;
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_half *pIn = (cl_half *)gIn;
|
||||
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, bufferSize,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
|
||||
memset_pattern4(gOut2[j], &pattern, bufferSize);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||
CL_FALSE, 0, bufferSize,
|
||||
gOut2[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSize, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSize, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_half);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue)))
|
||||
{
|
||||
vlog_error("clFlush failed\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
FPU_mode_type oldMode;
|
||||
RoundingMode oldRoundMode = kRoundToNearestEven;
|
||||
if (isFract)
|
||||
{
|
||||
// Calculate the correctly rounded reference result
|
||||
memset(&oldMode, 0, sizeof(oldMode));
|
||||
if (ftz) ForceFTZ(&oldMode);
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
if (gIsInRTZMode)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
}
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_half *ref1 = (cl_half *)gOut_Ref;
|
||||
cl_half *ref2 = (cl_half *)gOut_Ref2;
|
||||
|
||||
if (skipNanInf)
|
||||
{
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
double dd;
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
|
||||
ref2[j] = HFF((float)dd);
|
||||
|
||||
// ensure correct rounding of fract result is not reaching 1
|
||||
if (isFract && HTF(ref1[j]) >= 1.f) ref1[j] = 0x3bff;
|
||||
|
||||
overflow[j] =
|
||||
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
double dd;
|
||||
ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
|
||||
ref2[j] = HFF((float)dd);
|
||||
}
|
||||
}
|
||||
|
||||
if (isFract && ftz) RestoreFPState(&oldMode);
|
||||
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
|
||||
bufferSize, gOut2[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray2 failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting)
|
||||
{
|
||||
if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||
break;
|
||||
}
|
||||
|
||||
// Verify data
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_half *test1 = (cl_half *)gOut[k];
|
||||
cl_half *test2 = (cl_half *)gOut2[k];
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (ref1[j] != test1[j] || ref2[j] != test2[j])
|
||||
{
|
||||
double fp_correct1 = 0, fp_correct2 = 0;
|
||||
float err = 0, err2 = 0;
|
||||
|
||||
fp_correct1 = f->func.f_fpf(HTF(pIn[j]), &fp_correct2);
|
||||
|
||||
cl_half correct1 = HFF(fp_correct1);
|
||||
cl_half correct2 = HFF(fp_correct2);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input
|
||||
// or output is a infinity or NaN or overflow
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (skipNanInf && overflow[j]) continue;
|
||||
// Note: no double rounding here. Reference functions
|
||||
// calculate in single precision.
|
||||
if (IsHalfInfinity(correct1) || IsHalfNaN(correct1)
|
||||
|| IsHalfInfinity(correct2) || IsHalfNaN(correct2)
|
||||
|| IsHalfInfinity(pIn[j]) || IsHalfNaN(pIn[j]))
|
||||
continue;
|
||||
}
|
||||
|
||||
err = Ulp_Error_Half(test1[j], fp_correct1);
|
||||
err2 = Ulp_Error_Half(test2[j], fp_correct2);
|
||||
|
||||
int fail =
|
||||
!(fabsf(err) <= half_ulps && fabsf(err2) <= half_ulps);
|
||||
|
||||
if (ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(fp_correct1, half_ulps))
|
||||
{
|
||||
if (IsHalfResultSubnormal(fp_correct2, half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test1[j]) == 0.0f
|
||||
&& HTF(test2[j]) == 0.0f);
|
||||
if (!fail)
|
||||
{
|
||||
err = 0.0f;
|
||||
err2 = 0.0f;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test1[j]) == 0.0f
|
||||
&& fabsf(err2) <= half_ulps);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
else if (IsHalfResultSubnormal(fp_correct2, half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test2[j]) == 0.0f
|
||||
&& fabsf(err) <= half_ulps);
|
||||
if (!fail) err2 = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(pIn[j]))
|
||||
{
|
||||
double fp_correctp, fp_correctn;
|
||||
double fp_correct2p, fp_correct2n;
|
||||
float errp, err2p, errn, err2n;
|
||||
|
||||
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||
fp_correctp = f->func.f_fpf(0.0, &fp_correct2p);
|
||||
fp_correctn = f->func.f_fpf(-0.0, &fp_correct2n);
|
||||
|
||||
cl_half correctp = HFF(fp_correctp);
|
||||
cl_half correctn = HFF(fp_correctn);
|
||||
cl_half correct2p = HFF(fp_correct2p);
|
||||
cl_half correct2n = HFF(fp_correct2n);
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if
|
||||
// an input or output is a infinity or NaN or
|
||||
// overflow
|
||||
if (skipNanInf)
|
||||
{
|
||||
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||
|
||||
// Note: no double rounding here. Reference
|
||||
// functions calculate in single precision.
|
||||
if (IsHalfInfinity(correctp)
|
||||
|| IsHalfNaN(correctp)
|
||||
|| IsHalfInfinity(correctn)
|
||||
|| IsHalfNaN(correctn)
|
||||
|| IsHalfInfinity(correct2p)
|
||||
|| IsHalfNaN(correct2p)
|
||||
|| IsHalfInfinity(correct2n)
|
||||
|| IsHalfNaN(correct2n))
|
||||
continue;
|
||||
}
|
||||
|
||||
errp = Ulp_Error_Half(test1[j], fp_correctp);
|
||||
err2p = Ulp_Error_Half(test1[j], fp_correct2p);
|
||||
errn = Ulp_Error_Half(test1[j], fp_correctn);
|
||||
err2n = Ulp_Error_Half(test1[j], fp_correct2n);
|
||||
|
||||
fail = fail
|
||||
&& ((!(fabsf(errp) <= half_ulps))
|
||||
&& (!(fabsf(err2p) <= half_ulps))
|
||||
&& ((!(fabsf(errn) <= half_ulps))
|
||||
&& (!(fabsf(err2n) <= half_ulps))));
|
||||
if (fabsf(errp) < fabsf(err)) err = errp;
|
||||
if (fabsf(errn) < fabsf(err)) err = errn;
|
||||
if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
|
||||
if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (IsHalfResultSubnormal(fp_correctp, half_ulps)
|
||||
|| IsHalfResultSubnormal(fp_correctn,
|
||||
half_ulps))
|
||||
{
|
||||
if (IsHalfResultSubnormal(fp_correct2p,
|
||||
half_ulps)
|
||||
|| IsHalfResultSubnormal(fp_correct2n,
|
||||
half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test1[j]) == 0.0f
|
||||
&& HTF(test2[j]) == 0.0f);
|
||||
if (!fail) err = err2 = 0.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test1[j]) == 0.0f
|
||||
&& fabsf(err2) <= half_ulps);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
else if (IsHalfResultSubnormal(fp_correct2p,
|
||||
half_ulps)
|
||||
|| IsHalfResultSubnormal(fp_correct2n,
|
||||
half_ulps))
|
||||
{
|
||||
fail = fail
|
||||
&& !(HTF(test2[j]) == 0.0f
|
||||
&& (fabsf(err) <= half_ulps));
|
||||
if (!fail) err2 = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fabsf(err) > maxError0)
|
||||
{
|
||||
maxError0 = fabsf(err);
|
||||
maxErrorVal0 = HTF(pIn[j]);
|
||||
}
|
||||
if (fabsf(err2) > maxError1)
|
||||
{
|
||||
maxError1 = fabsf(err2);
|
||||
maxErrorVal1 = HTF(pIn[j]);
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
|
||||
"*{%a, %a} vs. {%a, %a}\n",
|
||||
f->name, sizeNames[k], err, err2,
|
||||
HTF(pIn[j]), HTF(ref1[j]), HTF(ref2[j]),
|
||||
HTF(test1[j]), HTF(test2[j]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||
" bufferSize:%10zu \n",
|
||||
i, step, bufferSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
|
||||
maxErrorVal1);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
347
test_conformance/math_brute_force/unary_two_results_i_half.cpp
Normal file
347
test_conformance/math_brute_force/unary_two_results_i_half.cpp
Normal file
@@ -0,0 +1,347 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::Int, ParameterType::Half,
|
||||
vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
cl_ulong abs_cl_long(cl_long i)
|
||||
{
|
||||
cl_long mask = i >> 63;
|
||||
return (i ^ mask) - mask;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
Programs programs;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
KernelMatrix kernels;
|
||||
float maxError = 0.0f;
|
||||
int64_t maxError2 = 0;
|
||||
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
float maxErrorVal = 0.0f;
|
||||
float maxErrorVal2 = 0.0f;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
|
||||
// sizeof(cl_half) < sizeof (int32_t)
|
||||
// to prevent overflowing gOut_Ref2 it is necessary to use
|
||||
// bigger type as denominator for buffer size calculation
|
||||
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
|
||||
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||
|
||||
size_t bufferSizeLo = bufferElements * sizeof(cl_half);
|
||||
size_t bufferSizeHi = bufferElements * sizeof(cl_int);
|
||||
|
||||
cl_ulong maxiError = 0;
|
||||
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
|
||||
float half_ulps = f->half_ulps;
|
||||
|
||||
maxiError = half_ulps == INFINITY ? CL_ULONG_MAX : 0;
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
return error;
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_half *pIn = (cl_half *)gIn;
|
||||
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSizeLo, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, bufferSizeLo);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, bufferSizeLo,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
|
||||
memset_pattern4(gOut2[j], &pattern, bufferSizeHi);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||
CL_FALSE, 0, bufferSizeHi,
|
||||
gOut2[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSizeLo, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSizeHi, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
// align working group size with the bigger output type
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_int);
|
||||
size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue)))
|
||||
{
|
||||
vlog_error("clFlush failed\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_half *ref1 = (cl_half *)gOut_Ref;
|
||||
int32_t *ref2 = (int32_t *)gOut_Ref2;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
ref1[j] = HFF((float)f->func.f_fpI(HTF(pIn[j]), ref2 + j));
|
||||
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
cl_bool blocking =
|
||||
(j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
|
||||
bufferSizeLo, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking,
|
||||
0, bufferSizeHi, gOut2[j], 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("ReadArray2 failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_half *test1 = (cl_half *)(gOut[k]);
|
||||
int32_t *test2 = (int32_t *)(gOut2[k]);
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (ref1[j] != test1[j] || ref2[j] != test2[j])
|
||||
{
|
||||
cl_half test = ((cl_half *)test1)[j];
|
||||
int correct2 = INT_MIN;
|
||||
float fp_correct =
|
||||
(float)f->func.f_fpI(HTF(pIn[j]), &correct2);
|
||||
cl_half correct = HFF(fp_correct);
|
||||
float err = correct != test
|
||||
? Ulp_Error_Half(test, fp_correct)
|
||||
: 0.f;
|
||||
cl_long iErr = (int64_t)test2[j] - (int64_t)correct2;
|
||||
int fail = !(fabsf(err) <= half_ulps
|
||||
&& abs_cl_long(iErr) <= maxiError);
|
||||
if (ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(fp_correct, half_ulps))
|
||||
{
|
||||
fail = fail && !(test == 0.0f && iErr == 0);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if (IsHalfSubnormal(pIn[j]))
|
||||
{
|
||||
int correct5, correct6;
|
||||
double fp_correct3 = f->func.f_fpI(0.0, &correct5);
|
||||
double fp_correct4 = f->func.f_fpI(-0.0, &correct6);
|
||||
|
||||
float err2 = Ulp_Error_Half(test, fp_correct3);
|
||||
float err3 = Ulp_Error_Half(test, fp_correct4);
|
||||
|
||||
cl_long iErr2 =
|
||||
(long long)test2[j] - (long long)correct5;
|
||||
cl_long iErr3 =
|
||||
(long long)test2[j] - (long long)correct6;
|
||||
|
||||
// Did +0 work?
|
||||
if (fabsf(err2) <= half_ulps
|
||||
&& abs_cl_long(iErr2) <= maxiError)
|
||||
{
|
||||
err = err2;
|
||||
iErr = iErr2;
|
||||
fail = 0;
|
||||
}
|
||||
// Did -0 work?
|
||||
else if (fabsf(err3) <= half_ulps
|
||||
&& abs_cl_long(iErr3) <= maxiError)
|
||||
{
|
||||
err = err3;
|
||||
iErr = iErr3;
|
||||
fail = 0;
|
||||
}
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if (fail
|
||||
&& (IsHalfResultSubnormal(correct2, half_ulps)
|
||||
|| IsHalfResultSubnormal(fp_correct3,
|
||||
half_ulps)))
|
||||
{
|
||||
fail = fail
|
||||
&& !(test == 0.0f
|
||||
&& (abs_cl_long(iErr2) <= maxiError
|
||||
|| abs_cl_long(iErr3)
|
||||
<= maxiError));
|
||||
if (!fail)
|
||||
{
|
||||
err = 0.0f;
|
||||
iErr = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fabsf(err) > maxError)
|
||||
{
|
||||
maxError = fabsf(err);
|
||||
maxErrorVal = pIn[j];
|
||||
}
|
||||
if (llabs(iErr) > maxError2)
|
||||
{
|
||||
maxError2 = llabs(iErr);
|
||||
maxErrorVal2 = pIn[j];
|
||||
}
|
||||
|
||||
if (fail)
|
||||
{
|
||||
vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
|
||||
"*{%a, %d} vs. {%a, %d}\n",
|
||||
f->name, sizeNames[k], err, (int)iErr,
|
||||
HTF(pIn[j]), HTF(ref1[j]),
|
||||
((int *)gOut_Ref2)[j], HTF(test), test2[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||
" bufferSize:%10zu \n",
|
||||
i, step, bufferSizeHi);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
|
||||
maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
239
test_conformance/math_brute_force/unary_u_half.cpp
Normal file
239
test_conformance/math_brute_force/unary_u_half.cpp
Normal file
@@ -0,0 +1,239 @@
|
||||
//
|
||||
// Copyright (c) 2023 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
#include "function_list.h"
|
||||
#include "test_functions.h"
|
||||
#include "utility.h"
|
||||
#include "reference_math.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <cinttypes>
|
||||
|
||||
namespace {
|
||||
|
||||
static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||
void *p)
|
||||
{
|
||||
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||
cl_uint vector_size_index) {
|
||||
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||
ParameterType::UShort, vector_size_index);
|
||||
};
|
||||
return BuildKernels(info, job_id, generator);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
|
||||
{
|
||||
int error;
|
||||
Programs programs;
|
||||
KernelMatrix kernels;
|
||||
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||
float maxError = 0.0f;
|
||||
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||
float maxErrorVal = 0.0f;
|
||||
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
|
||||
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||
size_t bufferSize = bufferElements * sizeof(cl_half);
|
||||
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||
const char *name = f->name;
|
||||
float half_ulps = f->half_ulps;
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
{
|
||||
return error;
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||
{
|
||||
// Init input array
|
||||
cl_ushort *p = (cl_ushort *)gIn;
|
||||
for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j;
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// write garbage into output arrays
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
uint32_t pattern = 0xacdcacdc;
|
||||
if (gHostFill)
|
||||
{
|
||||
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||
CL_FALSE, 0, bufferSize,
|
||||
gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error(
|
||||
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||
error, j);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||
sizeof(pattern), 0, bufferSize, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_half);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||
sizeof(gInBuffer), &gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||
1, NULL, &localCount, NULL, 0,
|
||||
NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||
|
||||
// Calculate the correctly rounded reference result
|
||||
cl_half *r = (cl_half *)gOut_Ref;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
if (!strcmp(name, "nan"))
|
||||
r[j] = reference_nanh(p[j]);
|
||||
else
|
||||
r[j] = HFF(f->func.f_u(p[j]));
|
||||
}
|
||||
// Read the data back
|
||||
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
if ((error =
|
||||
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("ReadArray failed %d\n", error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
if (gSkipCorrectnessTesting) break;
|
||||
|
||||
// Verify data
|
||||
cl_ushort *t = (cl_ushort *)gOut_Ref;
|
||||
for (size_t j = 0; j < bufferElements; j++)
|
||||
{
|
||||
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||
{
|
||||
cl_ushort *q = (cl_ushort *)(gOut[k]);
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if (t[j] != q[j])
|
||||
{
|
||||
double test = cl_half_to_float(q[j]);
|
||||
double correct;
|
||||
if (!strcmp(name, "nan"))
|
||||
correct = cl_half_to_float(reference_nanh(p[j]));
|
||||
else
|
||||
correct = f->func.f_u(p[j]);
|
||||
|
||||
float err = Ulp_Error_Half(q[j], correct);
|
||||
int fail = !(fabsf(err) <= half_ulps);
|
||||
|
||||
if (fail)
|
||||
{
|
||||
if (ftz)
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
if (IsHalfResultSubnormal(correct, half_ulps))
|
||||
{
|
||||
fail = fail && (test != 0.0f);
|
||||
if (!fail) err = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fabsf(err) > maxError)
|
||||
{
|
||||
maxError = fabsf(err);
|
||||
maxErrorVal = p[j];
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
vlog_error(
|
||||
"\n%s%s: %f ulp error at 0x%04x \nExpected: %a "
|
||||
"(0x%04x) \nActual: %a (0x%04x)\n",
|
||||
f->name, sizeNames[k], err, p[j],
|
||||
cl_half_to_float(r[j]), r[j], test, q[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == (i & 0x0fffffff))
|
||||
{
|
||||
if (gVerboseBruteForce)
|
||||
{
|
||||
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||
" bufferSize:%10zd \n",
|
||||
i, step, bufferSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(".");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
{
|
||||
if (gWimpyMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/ThreadPool.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "CL/cl_half.h"
|
||||
|
||||
#define BUFFER_SIZE (1024 * 1024 * 2)
|
||||
#define EMBEDDED_REDUCTION_FACTOR (64)
|
||||
@@ -61,10 +62,20 @@ extern int gFastRelaxedDerived;
|
||||
extern int gWimpyMode;
|
||||
extern int gHostFill;
|
||||
extern int gIsInRTZMode;
|
||||
extern int gHasHalf;
|
||||
extern int gInfNanSupport;
|
||||
extern int gIsEmbedded;
|
||||
extern int gVerboseBruteForce;
|
||||
extern uint32_t gMaxVectorSizeIndex;
|
||||
extern uint32_t gMinVectorSizeIndex;
|
||||
extern cl_device_fp_config gFloatCapabilities;
|
||||
extern cl_device_fp_config gHalfCapabilities;
|
||||
extern RoundingMode gFloatToHalfRoundingMode;
|
||||
|
||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||
|
||||
#define HFF(num) cl_half_from_float(num, gHalfRoundingMode)
|
||||
#define HTF(num) cl_half_to_float(num)
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
@@ -115,6 +126,12 @@ inline int IsFloatResultSubnormal(double x, float ulps)
|
||||
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
|
||||
}
|
||||
|
||||
inline int IsHalfResultSubnormal(float x, float ulps)
|
||||
{
|
||||
x = fabs(x) - MAKE_HEX_FLOAT(0x1.0p-24, 0x1, -24) * ulps;
|
||||
return x < MAKE_HEX_FLOAT(0x1.0p-14, 0x1, -14);
|
||||
}
|
||||
|
||||
inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
|
||||
{
|
||||
x = x - abs_err;
|
||||
@@ -157,6 +174,26 @@ inline int IsFloatNaN(double x)
|
||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||
}
|
||||
|
||||
inline bool IsHalfNaN(const cl_half v)
|
||||
{
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
|
||||
// NaN test
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
|
||||
inline bool IsHalfInfinity(const cl_half v)
|
||||
{
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
|
||||
// Inf test
|
||||
return (h_exp == 0x1F && h_mant == 0);
|
||||
}
|
||||
|
||||
cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
|
||||
Reference in New Issue
Block a user