mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Add fp16 testing to conversions and bruteforce (#1975)
Merge the `fp16-staging` branch into `main`, adding fp16 (`half`) testing to the conversions and math bruteforce tests. --------- Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Guo, Yilong <yilong.guo@intel.com> Signed-off-by: John Kesapides <john.kesapides@arm.com> Co-authored-by: Marcin Hajder <marcin.hajder@gmail.com> Co-authored-by: Ewan Crawford <ewan@codeplay.com> Co-authored-by: Wawiorko, Grzegorz <grzegorz.wawiorko@intel.com> Co-authored-by: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com> Co-authored-by: Harald van Dijk <harald@gigawatt.nl> Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com> Co-authored-by: Haonan Yang <haonan.yang@intel.com> Co-authored-by: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Co-authored-by: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Co-authored-by: Wenwan Xing <wenwan.xing@intel.com> Co-authored-by: Yilong Guo <yilong.guo@intel.com> Co-authored-by: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Co-authored-by: joshqti <127994991+joshqti@users.noreply.github.com> Co-authored-by: Pekka Jääskeläinen <pekka.jaaskelainen@tuni.fi> Co-authored-by: imilenkovic00 <155085410+imilenkovic00@users.noreply.github.com> Co-authored-by: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Co-authored-by: Aharon Abramson <aharon.abramson@mobileye.com>
This commit is contained in:
committed by
GitHub
parent
b3c89ebde0
commit
b6941b6c61
@@ -371,7 +371,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
|
|||||||
return (float)scalbn(testVal - reference, ulp_exp);
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
float Ulp_Error_Half(cl_half test, float reference)
|
float Ulp_Error_Half(cl_half test, double reference)
|
||||||
{
|
{
|
||||||
return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
|
return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -185,7 +185,7 @@ static int vlog_win32(const char *format, ...);
|
|||||||
|
|
||||||
extern const char *IGetErrorString(int clErrorCode);
|
extern const char *IGetErrorString(int clErrorCode);
|
||||||
|
|
||||||
extern float Ulp_Error_Half(cl_half test, float reference);
|
extern float Ulp_Error_Half(cl_half test, double reference);
|
||||||
extern float Ulp_Error(float test, double reference);
|
extern float Ulp_Error(float test, double reference);
|
||||||
extern float Ulp_Error_Double(double test, long double reference);
|
extern float Ulp_Error_Double(double test, long double reference);
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -42,10 +42,11 @@ typedef enum
|
|||||||
kshort = 3,
|
kshort = 3,
|
||||||
kuint = 4,
|
kuint = 4,
|
||||||
kint = 5,
|
kint = 5,
|
||||||
kfloat = 6,
|
khalf = 6,
|
||||||
kdouble = 7,
|
kfloat = 7,
|
||||||
kulong = 8,
|
kdouble = 8,
|
||||||
klong = 9,
|
kulong = 9,
|
||||||
|
klong = 10,
|
||||||
|
|
||||||
// This goes last
|
// This goes last
|
||||||
kTypeCount
|
kTypeCount
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -48,6 +48,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include "basic_test_conversions.h"
|
#include "basic_test_conversions.h"
|
||||||
|
|
||||||
@@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128;
|
|||||||
int gSkipTesting = 0;
|
int gSkipTesting = 0;
|
||||||
int gForceFTZ = 0;
|
int gForceFTZ = 0;
|
||||||
int gIsRTZ = 0;
|
int gIsRTZ = 0;
|
||||||
|
int gForceHalfFTZ = 0;
|
||||||
|
int gIsHalfRTZ = 0;
|
||||||
uint32_t gSimdSize = 1;
|
uint32_t gSimdSize = 1;
|
||||||
int gHasDouble = 0;
|
int gHasDouble = 0;
|
||||||
int gTestDouble = 1;
|
int gTestDouble = 1;
|
||||||
|
int gHasHalfs = 0;
|
||||||
|
int gTestHalfs = 1;
|
||||||
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
|
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
|
||||||
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
|
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
|
||||||
int gMinVectorSize = 0;
|
int gMinVectorSize = 0;
|
||||||
@@ -100,6 +105,8 @@ int argCount = 0;
|
|||||||
|
|
||||||
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||||
|
|
||||||
|
cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||||
|
cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
// for readability sake keep this section unformatted
|
// for readability sake keep this section unformatted
|
||||||
@@ -256,8 +263,30 @@ std::vector<double> DataInitInfo::specialValuesDouble = {
|
|||||||
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
|
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
|
||||||
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
|
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
|
||||||
};
|
};
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
std::vector<cl_half> DataInitInfo::specialValuesHalf = {
|
||||||
|
0xffff,
|
||||||
|
0x0000,
|
||||||
|
0x0001,
|
||||||
|
0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||||
// (that's x87 default state). This causes problems with the tests that
|
// (that's x87 default state). This causes problems with the tests that
|
||||||
@@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
template <typename InType, typename OutType>
|
int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
|
||||||
int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
|
uint32_t count,
|
||||||
int vectorSize)
|
int vectorSize)
|
||||||
{
|
{
|
||||||
const cl_uchar *a = (const cl_uchar *)gAllowZ;
|
const cl_uchar *a = (const cl_uchar *)gAllowZ;
|
||||||
|
|
||||||
if (std::is_integral<OutType>::value)
|
if (is_half<OutType, OutFP>())
|
||||||
{ // char/uchar/short/ushort/int/uint/long/ulong
|
{
|
||||||
|
const cl_half *t = (const cl_half *)test;
|
||||||
|
const cl_half *c = (const cl_half *)gRef;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < count; i++)
|
||||||
|
if (t[i] != c[i] &&
|
||||||
|
// Allow nan's to be binary different
|
||||||
|
!((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
|
||||||
|
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
|
||||||
|
{
|
||||||
|
vlog(
|
||||||
|
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
|
||||||
|
vectorSize, i, HTF(c[i]), HTF(t[i]));
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (std::is_integral<OutType>::value)
|
||||||
|
{ // char/uchar/short/ushort/half/int/uint/long/ulong
|
||||||
const OutType *t = (const OutType *)test;
|
const OutType *t = (const OutType *)test;
|
||||||
const OutType *c = (const OutType *)gRef;
|
const OutType *c = (const OutType *)gRef;
|
||||||
for (uint32_t i = 0; i < count; i++)
|
for (uint32_t i = 0; i < count; i++)
|
||||||
@@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run()
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// skip half if we don't have it
|
||||||
|
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||||
|
{
|
||||||
|
if (gHasHalfs)
|
||||||
|
{
|
||||||
|
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||||
|
gTypeNames[outType], gSaturationNames[sat],
|
||||||
|
gRoundingModeNames[round], gTypeNames[inType]);
|
||||||
|
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||||
|
"off.\n");
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// skip longs on embedded
|
// skip longs on embedded
|
||||||
if (!gHasLong
|
if (!gHasLong
|
||||||
&& (inType == klong || outType == klong || inType == kulong
|
&& (inType == klong || outType == klong || inType == kulong
|
||||||
@@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
|
|||||||
cl_command_queue queue)
|
cl_command_queue queue)
|
||||||
: context(context), device(device), queue(queue), num_elements(0),
|
: context(context), device(device), queue(queue), num_elements(0),
|
||||||
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
|
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
|
||||||
cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
|
cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
|
||||||
cl_ulong(0), cl_long(0) })
|
cl_double(0), cl_ulong(0), cl_long(0) })
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
|
||||||
@@ -445,11 +505,31 @@ cl_int ConversionsTest::Run()
|
|||||||
cl_int ConversionsTest::SetUp(int elements)
|
cl_int ConversionsTest::SetUp(int elements)
|
||||||
{
|
{
|
||||||
num_elements = elements;
|
num_elements = elements;
|
||||||
|
if (is_extension_available(device, "cl_khr_fp16"))
|
||||||
|
{
|
||||||
|
const cl_device_fp_config fpConfigHalf =
|
||||||
|
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
|
||||||
|
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
|
||||||
|
{
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||||
|
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||||
|
}
|
||||||
|
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
|
||||||
|
{
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||||
|
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_error("Error while acquiring half rounding mode");
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
template <typename InType, typename OutType>
|
|
||||||
void ConversionsTest::TestTypesConversion(const Type &inType,
|
void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||||
const Type &outType, int &testNumber,
|
const Type &outType, int &testNumber,
|
||||||
int startMinVectorSize)
|
int startMinVectorSize)
|
||||||
@@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
|||||||
sat = (SaturationMode)(sat + 1))
|
sat = (SaturationMode)(sat + 1))
|
||||||
{
|
{
|
||||||
// skip illegal saturated conversions to float type
|
// skip illegal saturated conversions to float type
|
||||||
if (kSaturated == sat && (outType == kfloat || outType == kdouble))
|
if (kSaturated == sat
|
||||||
|
&& (outType == kfloat || outType == kdouble || outType == khalf))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// skip half if we don't have it
|
||||||
|
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||||
|
{
|
||||||
|
if (gHasHalfs)
|
||||||
|
{
|
||||||
|
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||||
|
gTypeNames[outType], gSaturationNames[sat],
|
||||||
|
gRoundingModeNames[round], gTypeNames[inType]);
|
||||||
|
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||||
|
"off.\n");
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Skip the implicit converts if the rounding mode is
|
// Skip the implicit converts if the rounding mode is
|
||||||
// not default or test is saturated
|
// not default or test is saturated
|
||||||
if (0 == startMinVectorSize)
|
if (0 == startMinVectorSize)
|
||||||
@@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
|||||||
gMinVectorSize = 0;
|
gMinVectorSize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
|
if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
|
||||||
|
sat, round)))
|
||||||
{
|
{
|
||||||
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
|
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
|
||||||
"FAILED ** \n",
|
"FAILED ** \n",
|
||||||
@@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
template <typename InType, typename OutType>
|
|
||||||
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||||
RoundingMode round)
|
RoundingMode round)
|
||||||
{
|
{
|
||||||
@@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
|||||||
cl_uint threads = GetThreadCount();
|
cl_uint threads = GetThreadCount();
|
||||||
|
|
||||||
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
|
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
|
||||||
DataInfoSpec<InType, OutType> init_info(info);
|
DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
|
||||||
WriteInputBufferInfo writeInputBufferInfo;
|
WriteInputBufferInfo writeInputBufferInfo;
|
||||||
int vectorSize;
|
int vectorSize;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
@@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
|||||||
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
|
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
|
||||||
{
|
{
|
||||||
writeInputBufferInfo.calcInfo[vectorSize].reset(
|
writeInputBufferInfo.calcInfo[vectorSize].reset(
|
||||||
new CalcRefValsPat<InType, OutType>());
|
new CalcRefValsPat<InType, OutType, InFP, OutFP>());
|
||||||
writeInputBufferInfo.calcInfo[vectorSize]->program =
|
writeInputBufferInfo.calcInfo[vectorSize]->program =
|
||||||
conv_test::MakeProgram(
|
conv_test::MakeProgram(
|
||||||
outType, inType, sat, round, vectorSize,
|
outType, inType, sat, round, vectorSize,
|
||||||
@@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
|||||||
if (round == kDefaultRoundingMode && gIsRTZ)
|
if (round == kDefaultRoundingMode && gIsRTZ)
|
||||||
init_info.round = round = kRoundTowardZero;
|
init_info.round = round = kRoundTowardZero;
|
||||||
}
|
}
|
||||||
|
else if (std::is_same<OutType, cl_half>::value && OutFP)
|
||||||
|
{
|
||||||
|
if (round == kDefaultRoundingMode && gIsHalfRTZ)
|
||||||
|
init_info.round = round = kRoundTowardZero;
|
||||||
|
}
|
||||||
|
|
||||||
// Figure out how many elements are in a work block
|
// Figure out how many elements are in a work block
|
||||||
// we handle 64-bit types a bit differently.
|
// we handle 64-bit types a bit differently.
|
||||||
@@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
|||||||
vlog("Input value: 0x%8.8x ",
|
vlog("Input value: 0x%8.8x ",
|
||||||
((unsigned int *)gIn)[error - 1]);
|
((unsigned int *)gIn)[error - 1]);
|
||||||
break;
|
break;
|
||||||
|
case khalf:
|
||||||
|
vlog("Input value: %a ",
|
||||||
|
HTF(((cl_half *)gIn)[error - 1]));
|
||||||
|
break;
|
||||||
case kfloat:
|
case kfloat:
|
||||||
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
|
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
|
||||||
break;
|
break;
|
||||||
@@ -901,16 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
|
|
||||||
{
|
|
||||||
cl_uint i;
|
|
||||||
for (i = 0; i < count; ++i)
|
|
||||||
allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
|
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
|
||||||
|
|
||||||
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||||
@@ -951,6 +1045,112 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
|
|||||||
// destroyed automatically soon after we exit.
|
// destroyed automatically soon after we exit.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T> static bool isnan_fp(const T &v)
|
||||||
|
{
|
||||||
|
if (std::is_same<T, cl_half>::value)
|
||||||
|
{
|
||||||
|
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||||
|
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||||
|
return (h_exp == 0x1F && h_mant != 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if !defined(_WIN32)
|
||||||
|
return std::isnan(v);
|
||||||
|
#else
|
||||||
|
return _isnan(v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InType>
|
||||||
|
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType)
|
||||||
|
{
|
||||||
|
InType *inp = (InType *)gIn;
|
||||||
|
for (auto j = 0; j < count; j++)
|
||||||
|
{
|
||||||
|
if (isnan_fp<InType>(inp[j]))
|
||||||
|
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||||
|
gTypeSizes[outType]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType>
|
||||||
|
void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
|
||||||
|
{
|
||||||
|
if (std::is_same<OutType, cl_half>::value)
|
||||||
|
{
|
||||||
|
for (auto j = 0; j < count; j++)
|
||||||
|
if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
|
||||||
|
outp[j] = 0x7e00; // HALF_NAN
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (auto j = 0; j < count; j++)
|
||||||
|
if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FixNanConversions(Type outType, Type inType, void *d, cl_uint count)
|
||||||
|
{
|
||||||
|
if (outType != kfloat && outType != kdouble && outType != khalf)
|
||||||
|
{
|
||||||
|
if (inType == kfloat)
|
||||||
|
ZeroNanToIntCases<float>(count, d, outType);
|
||||||
|
else if (inType == kdouble)
|
||||||
|
ZeroNanToIntCases<double>(count, d, outType);
|
||||||
|
else if (inType == khalf)
|
||||||
|
ZeroNanToIntCases<cl_half>(count, d, outType);
|
||||||
|
}
|
||||||
|
else if (inType == kfloat || inType == kdouble || inType == khalf)
|
||||||
|
{
|
||||||
|
// outtype and intype is float or double or half. NaN conversions for
|
||||||
|
// float/double/half could be any NaN
|
||||||
|
if (inType == kfloat)
|
||||||
|
{
|
||||||
|
float *inp = (float *)gIn;
|
||||||
|
if (outType == kdouble)
|
||||||
|
{
|
||||||
|
double *outp = (double *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
else if (outType == khalf)
|
||||||
|
{
|
||||||
|
cl_half *outp = (cl_half *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (inType == kdouble)
|
||||||
|
{
|
||||||
|
double *inp = (double *)gIn;
|
||||||
|
if (outType == kfloat)
|
||||||
|
{
|
||||||
|
float *outp = (float *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
else if (outType == khalf)
|
||||||
|
{
|
||||||
|
cl_half *outp = (cl_half *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (inType == khalf)
|
||||||
|
{
|
||||||
|
cl_half *inp = (cl_half *)gIn;
|
||||||
|
if (outType == kfloat)
|
||||||
|
{
|
||||||
|
float *outp = (float *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
else if (outType == kdouble)
|
||||||
|
{
|
||||||
|
double *outp = (double *)d;
|
||||||
|
FixNanToFltConversions(inp, outp, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||||
void *data)
|
void *data)
|
||||||
@@ -963,7 +1163,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
|||||||
Type outType =
|
Type outType =
|
||||||
info->parent->outType; // the data type of the conversion result
|
info->parent->outType; // the data type of the conversion result
|
||||||
Type inType = info->parent->inType; // the data type of the conversion input
|
Type inType = info->parent->inType; // the data type of the conversion input
|
||||||
size_t j;
|
|
||||||
cl_int error;
|
cl_int error;
|
||||||
cl_event doneBarrier = info->parent->doneBarrier;
|
cl_event doneBarrier = info->parent->doneBarrier;
|
||||||
|
|
||||||
@@ -985,51 +1184,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
|||||||
|
|
||||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||||
// any integer
|
// any integer
|
||||||
if (outType != kfloat && outType != kdouble)
|
FixNanConversions(outType, inType, mapped, count);
|
||||||
{
|
|
||||||
if (inType == kfloat)
|
|
||||||
{
|
|
||||||
float *inp = (float *)gIn;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]))
|
|
||||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
|
||||||
gTypeSizes[outType]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inType == kdouble)
|
|
||||||
{
|
|
||||||
double *inp = (double *)gIn;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]))
|
|
||||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
|
||||||
gTypeSizes[outType]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (inType == kfloat || inType == kdouble)
|
|
||||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
|
||||||
// double can be any NaN
|
|
||||||
if (inType == kfloat && outType == kdouble)
|
|
||||||
{
|
|
||||||
float *inp = (float *)gIn;
|
|
||||||
double *outp = (double *)mapped;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inType == kdouble && outType == kfloat)
|
|
||||||
{
|
|
||||||
double *inp = (double *)gIn;
|
|
||||||
float *outp = (float *)mapped;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
|
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
|
||||||
info->result =
|
info->result =
|
||||||
@@ -1077,12 +1232,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
|||||||
// CalcReferenceValuesComplete exit.
|
// CalcReferenceValuesComplete exit.
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
namespace conv_test {
|
namespace conv_test {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
||||||
{
|
{
|
||||||
DataInitBase *info = (DataInitBase *)p;
|
DataInitBase *info = (DataInitBase *)p;
|
||||||
@@ -1092,8 +1243,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||||
{
|
{
|
||||||
DataInitBase *info = (DataInitBase *)p;
|
DataInitBase *info = (DataInitBase *)p;
|
||||||
@@ -1102,7 +1251,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
Type inType = info->inType;
|
Type inType = info->inType;
|
||||||
Type outType = info->outType;
|
Type outType = info->outType;
|
||||||
RoundingMode round = info->round;
|
RoundingMode round = info->round;
|
||||||
size_t j;
|
|
||||||
|
|
||||||
Force64BitFPUPrecision();
|
Force64BitFPUPrecision();
|
||||||
|
|
||||||
@@ -1110,7 +1258,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
void *a = (cl_uchar *)gAllowZ + job_id * count;
|
void *a = (cl_uchar *)gAllowZ + job_id * count;
|
||||||
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
|
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
|
||||||
|
|
||||||
|
|
||||||
if (outType != inType)
|
if (outType != inType)
|
||||||
{
|
{
|
||||||
// create the reference while we wait
|
// create the reference while we wait
|
||||||
@@ -1144,7 +1291,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
qcom_sat = info->sat;
|
qcom_sat = info->sat;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
RoundingMode oldRound = set_round(round, outType);
|
RoundingMode oldRound;
|
||||||
|
if (outType == khalf)
|
||||||
|
{
|
||||||
|
oldRound = set_round(kRoundToNearestEven, kfloat);
|
||||||
|
switch (round)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
case kDefaultRoundingMode:
|
||||||
|
DataInitInfo::halfRoundingMode =
|
||||||
|
ConversionsTest::defaultHalfRoundingMode;
|
||||||
|
break;
|
||||||
|
case kRoundToNearestEven:
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||||
|
break;
|
||||||
|
case kRoundUp:
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTP;
|
||||||
|
break;
|
||||||
|
case kRoundDown:
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTN;
|
||||||
|
break;
|
||||||
|
case kRoundTowardZero:
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
oldRound = set_round(round, outType);
|
||||||
|
|
||||||
if (info->sat)
|
if (info->sat)
|
||||||
info->conv_array_sat(d, s, count);
|
info->conv_array_sat(d, s, count);
|
||||||
@@ -1156,10 +1329,13 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
// Decide if we allow a zero result in addition to the correctly rounded
|
// Decide if we allow a zero result in addition to the correctly rounded
|
||||||
// one
|
// one
|
||||||
memset(a, 0, count);
|
memset(a, 0, count);
|
||||||
if (gForceFTZ)
|
if (gForceFTZ && (inType == kfloat || outType == kfloat))
|
||||||
{
|
{
|
||||||
if (inType == kfloat || outType == kfloat)
|
info->set_allow_zero_array((uint8_t *)a, d, s, count);
|
||||||
setAllowZ((uint8_t *)a, (uint32_t *)s, count);
|
}
|
||||||
|
if (gForceHalfFTZ && (inType == khalf || outType == khalf))
|
||||||
|
{
|
||||||
|
info->set_allow_zero_array((uint8_t *)a, d, s, count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1170,55 +1346,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
|||||||
|
|
||||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||||
// any integer
|
// any integer
|
||||||
if (info->outType != kfloat && info->outType != kdouble)
|
FixNanConversions(outType, inType, d, count);
|
||||||
{
|
|
||||||
if (inType == kfloat)
|
|
||||||
{
|
|
||||||
float *inp = (float *)s;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]))
|
|
||||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
|
||||||
gTypeSizes[outType]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inType == kdouble)
|
|
||||||
{
|
|
||||||
double *inp = (double *)s;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j]))
|
|
||||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
|
||||||
gTypeSizes[outType]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (inType == kfloat || inType == kdouble)
|
|
||||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
|
||||||
// double can be any NaN
|
|
||||||
if (inType == kfloat && outType == kdouble)
|
|
||||||
{
|
|
||||||
float *inp = (float *)s;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j])) ((double *)d)[j] = NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inType == kdouble && outType == kfloat)
|
|
||||||
{
|
|
||||||
double *inp = (double *)s;
|
|
||||||
for (j = 0; j < count; j++)
|
|
||||||
{
|
|
||||||
if (isnan(inp[j])) ((float *)d)[j] = NAN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
uint64_t GetTime(void)
|
uint64_t GetTime(void)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
@@ -1233,8 +1365,6 @@ uint64_t GetTime(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// Note: not called reentrantly
|
// Note: not called reentrantly
|
||||||
void WriteInputBufferComplete(void *data)
|
void WriteInputBufferComplete(void *data)
|
||||||
{
|
{
|
||||||
@@ -1295,8 +1425,6 @@ void WriteInputBufferComplete(void *data)
|
|||||||
// automatically soon after we exit.
|
// automatically soon after we exit.
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||||
RoundingMode round, int vectorSize, cl_kernel *outKernel)
|
RoundingMode round, int vectorSize, cl_kernel *outKernel)
|
||||||
{
|
{
|
||||||
@@ -1308,6 +1436,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
|||||||
if (outType == kdouble || inType == kdouble)
|
if (outType == kdouble || inType == kdouble)
|
||||||
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||||
|
|
||||||
|
if (outType == khalf || inType == khalf)
|
||||||
|
source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||||
|
|
||||||
// Create the program. This is a bit complicated because we are trying to
|
// Create the program. This is a bit complicated because we are trying to
|
||||||
// avoid byte and short stores.
|
// avoid byte and short stores.
|
||||||
if (0 == vectorSize)
|
if (0 == vectorSize)
|
||||||
@@ -1408,7 +1539,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
|||||||
*outKernel = NULL;
|
*outKernel = NULL;
|
||||||
|
|
||||||
const char *flags = NULL;
|
const char *flags = NULL;
|
||||||
if (gForceFTZ) flags = "-cl-denorms-are-zero";
|
if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
|
||||||
|
|
||||||
// build it
|
// build it
|
||||||
std::string sourceString = source.str();
|
std::string sourceString = source.str();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2023 The Khronos Group Inc.
|
// Copyright (c) 2023-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -30,6 +30,8 @@
|
|||||||
#include <CL/opencl.h>
|
#include <CL/opencl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <CL/cl_half.h>
|
||||||
|
|
||||||
#include "harness/mt19937.h"
|
#include "harness/mt19937.h"
|
||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/typeWrappers.h"
|
#include "harness/typeWrappers.h"
|
||||||
@@ -76,6 +78,8 @@ extern cl_mem gInBuffer;
|
|||||||
extern cl_mem gOutBuffers[];
|
extern cl_mem gOutBuffers[];
|
||||||
extern int gHasDouble;
|
extern int gHasDouble;
|
||||||
extern int gTestDouble;
|
extern int gTestDouble;
|
||||||
|
extern int gHasHalfs;
|
||||||
|
extern int gTestHalfs;
|
||||||
extern int gWimpyMode;
|
extern int gWimpyMode;
|
||||||
extern int gWimpyReductionFactor;
|
extern int gWimpyReductionFactor;
|
||||||
extern int gSkipTesting;
|
extern int gSkipTesting;
|
||||||
@@ -87,6 +91,8 @@ extern int gReportAverageTimes;
|
|||||||
extern int gStartTestNumber;
|
extern int gStartTestNumber;
|
||||||
extern int gEndTestNumber;
|
extern int gEndTestNumber;
|
||||||
extern int gIsRTZ;
|
extern int gIsRTZ;
|
||||||
|
extern int gForceHalfFTZ;
|
||||||
|
extern int gIsHalfRTZ;
|
||||||
extern void *gIn;
|
extern void *gIn;
|
||||||
extern void *gRef;
|
extern void *gRef;
|
||||||
extern void *gAllowZ;
|
extern void *gAllowZ;
|
||||||
@@ -135,7 +141,7 @@ struct CalcRefValsBase
|
|||||||
cl_int result;
|
cl_int result;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
struct CalcRefValsPat : CalcRefValsBase
|
struct CalcRefValsPat : CalcRefValsBase
|
||||||
{
|
{
|
||||||
int check_result(void *, uint32_t, int) override;
|
int check_result(void *, uint32_t, int) override;
|
||||||
@@ -162,8 +168,12 @@ struct WriteInputBufferInfo
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Must be aligned with Type enums!
|
// Must be aligned with Type enums!
|
||||||
using TypeIter = std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint,
|
using TypeIter =
|
||||||
cl_int, cl_float, cl_double, cl_ulong, cl_long>;
|
std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint, cl_int, cl_half,
|
||||||
|
cl_float, cl_double, cl_ulong, cl_long>;
|
||||||
|
|
||||||
|
// hardcoded solution needed due to typeid confusing cl_ushort/cl_half
|
||||||
|
constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0 };
|
||||||
|
|
||||||
// Helper test fixture for constructing OpenCL objects used in testing
|
// Helper test fixture for constructing OpenCL objects used in testing
|
||||||
// a variety of simple command-buffer enqueue scenarios.
|
// a variety of simple command-buffer enqueue scenarios.
|
||||||
@@ -179,13 +189,13 @@ struct ConversionsTest
|
|||||||
// Test body returning an OpenCL error code
|
// Test body returning an OpenCL error code
|
||||||
cl_int Run();
|
cl_int Run();
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
int DoTest(Type outType, Type inType, SaturationMode sat,
|
int DoTest(Type outType, Type inType, SaturationMode sat,
|
||||||
RoundingMode round);
|
RoundingMode round);
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
void TestTypesConversion(const Type &inType, const Type &outType, int &tn,
|
void TestTypesConversion(const Type &inType, const Type &outType, int &tn,
|
||||||
const int smvs);
|
int startMinVectorSize);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
cl_context context;
|
cl_context context;
|
||||||
@@ -195,6 +205,9 @@ protected:
|
|||||||
size_t num_elements;
|
size_t num_elements;
|
||||||
|
|
||||||
TypeIter typeIterator;
|
TypeIter typeIterator;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static cl_half_rounding_mode defaultHalfRoundingMode;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CustomConversionsTest : ConversionsTest
|
struct CustomConversionsTest : ConversionsTest
|
||||||
@@ -221,17 +234,18 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
|
|||||||
|
|
||||||
struct TestType
|
struct TestType
|
||||||
{
|
{
|
||||||
template <typename T> bool testType(Type in)
|
template <typename T, bool FP> bool testType(Type in)
|
||||||
{
|
{
|
||||||
switch (in)
|
switch (in)
|
||||||
{
|
{
|
||||||
default: return false;
|
default: return false;
|
||||||
case kuchar: return std::is_same<cl_uchar, T>::value;
|
case kuchar: return std::is_same<cl_uchar, T>::value;
|
||||||
case kchar: return std::is_same<cl_char, T>::value;
|
case kchar: return std::is_same<cl_char, T>::value;
|
||||||
case kushort: return std::is_same<cl_ushort, T>::value;
|
case kushort: return std::is_same<cl_ushort, T>::value && !FP;
|
||||||
case kshort: return std::is_same<cl_short, T>::value;
|
case kshort: return std::is_same<cl_short, T>::value;
|
||||||
case kuint: return std::is_same<cl_uint, T>::value;
|
case kuint: return std::is_same<cl_uint, T>::value;
|
||||||
case kint: return std::is_same<cl_int, T>::value;
|
case kint: return std::is_same<cl_int, T>::value;
|
||||||
|
case khalf: return std::is_same<cl_half, T>::value && FP;
|
||||||
case kfloat: return std::is_same<cl_float, T>::value;
|
case kfloat: return std::is_same<cl_float, T>::value;
|
||||||
case kdouble: return std::is_same<cl_double, T>::value;
|
case kdouble: return std::is_same<cl_double, T>::value;
|
||||||
case kulong: return std::is_same<cl_ulong, T>::value;
|
case kulong: return std::is_same<cl_ulong, T>::value;
|
||||||
@@ -263,13 +277,15 @@ protected:
|
|||||||
typename InType>
|
typename InType>
|
||||||
void iterate_in_type(const InType &t)
|
void iterate_in_type(const InType &t)
|
||||||
{
|
{
|
||||||
if (!testType<InType>(inType)) vlog_error("Unexpected data type!\n");
|
if (!testType<InType, isTypeFp[In]>(inType))
|
||||||
|
vlog_error("Unexpected data type!\n");
|
||||||
|
|
||||||
if (!testType<OutType>(outType)) vlog_error("Unexpected data type!\n");
|
if (!testType<OutType, isTypeFp[Out]>(outType))
|
||||||
|
vlog_error("Unexpected data type!\n");
|
||||||
|
|
||||||
// run the conversions
|
// run the conversions
|
||||||
test.TestTypesConversion<InType, OutType>(inType, outType, testNumber,
|
test.TestTypesConversion<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
|
||||||
startMinVectorSize);
|
inType, outType, testNumber, startMinVectorSize);
|
||||||
inType = (Type)(inType + 1);
|
inType = (Type)(inType + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -337,11 +353,13 @@ protected:
|
|||||||
typename InType>
|
typename InType>
|
||||||
void iterate_in_type(const InType &t)
|
void iterate_in_type(const InType &t)
|
||||||
{
|
{
|
||||||
if (testType<InType>(inType) && testType<OutType>(outType))
|
if (testType<InType, isTypeFp[In]>(inType)
|
||||||
|
&& testType<OutType, isTypeFp[Out]>(outType))
|
||||||
{
|
{
|
||||||
// run selected conversion
|
// run selected conversion
|
||||||
// testing of the result will happen afterwards
|
// testing of the result will happen afterwards
|
||||||
test.DoTest<InType, OutType>(outType, inType, saturation, rounding);
|
test.DoTest<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
|
||||||
|
outType, inType, saturation, rounding);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2023 The Khronos Group Inc.
|
// Copyright (c) 2023-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -28,8 +28,12 @@ extern bool qcom_sat;
|
|||||||
extern roundingMode qcom_rm;
|
extern roundingMode qcom_rm;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <CL/cl_half.h>
|
||||||
|
|
||||||
|
#include "harness/conversions.h"
|
||||||
#include "harness/mt19937.h"
|
#include "harness/mt19937.h"
|
||||||
#include "harness/rounding_mode.h"
|
#include "harness/rounding_mode.h"
|
||||||
|
#include "harness/typeWrappers.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -60,11 +64,17 @@ struct DataInitInfo
|
|||||||
RoundingMode round;
|
RoundingMode round;
|
||||||
cl_uint threads;
|
cl_uint threads;
|
||||||
|
|
||||||
|
static cl_half_rounding_mode halfRoundingMode;
|
||||||
static std::vector<uint32_t> specialValuesUInt;
|
static std::vector<uint32_t> specialValuesUInt;
|
||||||
static std::vector<float> specialValuesFloat;
|
static std::vector<float> specialValuesFloat;
|
||||||
static std::vector<double> specialValuesDouble;
|
static std::vector<double> specialValuesDouble;
|
||||||
|
static std::vector<cl_half> specialValuesHalf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode)
|
||||||
|
#define HTF(num) cl_half_to_float(num)
|
||||||
|
#define HFD(num) cl_half_from_double(num, DataInitInfo::halfRoundingMode)
|
||||||
|
|
||||||
struct DataInitBase : public DataInitInfo
|
struct DataInitBase : public DataInitInfo
|
||||||
{
|
{
|
||||||
virtual ~DataInitBase() = default;
|
virtual ~DataInitBase() = default;
|
||||||
@@ -73,9 +83,12 @@ struct DataInitBase : public DataInitInfo
|
|||||||
virtual void conv_array(void *out, void *in, size_t n) {}
|
virtual void conv_array(void *out, void *in, size_t n) {}
|
||||||
virtual void conv_array_sat(void *out, void *in, size_t n) {}
|
virtual void conv_array_sat(void *out, void *in, size_t n) {}
|
||||||
virtual void init(const cl_uint &, const cl_uint &) {}
|
virtual void init(const cl_uint &, const cl_uint &) {}
|
||||||
|
virtual void set_allow_zero_array(uint8_t *allow, void *out, void *in,
|
||||||
|
size_t n)
|
||||||
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
struct DataInfoSpec : public DataInitBase
|
struct DataInfoSpec : public DataInitBase
|
||||||
{
|
{
|
||||||
explicit DataInfoSpec(const DataInitInfo &agg);
|
explicit DataInfoSpec(const DataInitInfo &agg);
|
||||||
@@ -90,6 +103,9 @@ struct DataInfoSpec : public DataInitBase
|
|||||||
void conv(OutType *out, InType *in);
|
void conv(OutType *out, InType *in);
|
||||||
void conv_sat(OutType *out, InType *in);
|
void conv_sat(OutType *out, InType *in);
|
||||||
|
|
||||||
|
// Decide if we allow a zero result in addition to the correctly rounded one
|
||||||
|
void set_allow_zero(uint8_t *allow, OutType *out, InType *in);
|
||||||
|
|
||||||
// min/max ranges for output type of data
|
// min/max ranges for output type of data
|
||||||
std::pair<OutType, OutType> ranges;
|
std::pair<OutType, OutType> ranges;
|
||||||
|
|
||||||
@@ -98,6 +114,16 @@ struct DataInfoSpec : public DataInitBase
|
|||||||
|
|
||||||
std::vector<MTdataHolder> mdv;
|
std::vector<MTdataHolder> mdv;
|
||||||
|
|
||||||
|
constexpr bool is_in_half() const
|
||||||
|
{
|
||||||
|
return (std::is_same<InType, cl_half>::value && InFP);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool is_out_half() const
|
||||||
|
{
|
||||||
|
return (std::is_same<OutType, cl_half>::value && OutFP);
|
||||||
|
}
|
||||||
|
|
||||||
void conv_array(void *out, void *in, size_t n) override
|
void conv_array(void *out, void *in, size_t n) override
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
@@ -111,6 +137,12 @@ struct DataInfoSpec : public DataInitBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
void init(const cl_uint &, const cl_uint &) override;
|
void init(const cl_uint &, const cl_uint &) override;
|
||||||
|
void set_allow_zero_array(uint8_t *allow, void *out, void *in,
|
||||||
|
size_t n) override
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < n; i++)
|
||||||
|
set_allow_zero(&allow[i], &((OutType *)out)[i], &((InType *)in)[i]);
|
||||||
|
}
|
||||||
InType clamp(const InType &);
|
InType clamp(const InType &);
|
||||||
inline float fclamp(float lo, float v, float hi)
|
inline float fclamp(float lo, float v, float hi)
|
||||||
{
|
{
|
||||||
@@ -125,19 +157,22 @@ struct DataInfoSpec : public DataInitBase
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
DataInfoSpec<InType, OutType, InFP, OutFP>::DataInfoSpec(
|
||||||
|
const DataInitInfo &agg)
|
||||||
: DataInitBase(agg), mdv(0)
|
: DataInitBase(agg), mdv(0)
|
||||||
{
|
{
|
||||||
if (std::is_same<cl_float, OutType>::value)
|
if (std::is_same<cl_float, OutType>::value)
|
||||||
ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
|
ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
|
||||||
else if (std::is_same<cl_double, OutType>::value)
|
else if (std::is_same<cl_double, OutType>::value)
|
||||||
ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
|
ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
|
||||||
|
else if (std::is_same<cl_half, OutType>::value && OutFP)
|
||||||
|
ranges = std::make_pair(HFF(CL_HALF_MIN), HFF(CL_HALF_MAX));
|
||||||
else if (std::is_same<cl_uchar, OutType>::value)
|
else if (std::is_same<cl_uchar, OutType>::value)
|
||||||
ranges = std::make_pair(0, CL_UCHAR_MAX);
|
ranges = std::make_pair(0, CL_UCHAR_MAX);
|
||||||
else if (std::is_same<cl_char, OutType>::value)
|
else if (std::is_same<cl_char, OutType>::value)
|
||||||
ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
|
ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
|
||||||
else if (std::is_same<cl_ushort, OutType>::value)
|
else if (std::is_same<cl_ushort, OutType>::value && !OutFP)
|
||||||
ranges = std::make_pair(0, CL_USHRT_MAX);
|
ranges = std::make_pair(0, CL_USHRT_MAX);
|
||||||
else if (std::is_same<cl_short, OutType>::value)
|
else if (std::is_same<cl_short, OutType>::value)
|
||||||
ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
|
ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
|
||||||
@@ -158,12 +193,12 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
|||||||
InType outMax = static_cast<InType>(ranges.second);
|
InType outMax = static_cast<InType>(ranges.second);
|
||||||
|
|
||||||
InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
|
InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
|
||||||
if (std::is_integral<OutType>::value)
|
if (std::is_integral<OutType>::value && !OutFP)
|
||||||
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
||||||
if (sizeof(OutType)<=sizeof(cl_short))
|
if (sizeof(OutType)<=sizeof(cl_short))
|
||||||
{ // to char/uchar/short/ushort
|
{ // to char/uchar/short/ushort
|
||||||
clamp_ranges=
|
clamp_ranges=
|
||||||
{{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
{{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
||||||
{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
||||||
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
|
||||||
{outMin-0.0f, outMax - outMax * 0.5f * eps },
|
{outMin-0.0f, outMax - outMax * 0.5f * eps },
|
||||||
@@ -249,11 +284,55 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (is_in_half())
|
||||||
|
{
|
||||||
|
float outMin = static_cast<float>(ranges.first);
|
||||||
|
float outMax = static_cast<float>(ranges.second);
|
||||||
|
float eps = CL_HALF_EPSILON;
|
||||||
|
cl_half_rounding_mode prev_half_round = DataInitInfo::halfRoundingMode;
|
||||||
|
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||||
|
|
||||||
|
if (std::is_integral<OutType>::value)
|
||||||
|
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
||||||
|
if (sizeof(OutType)<=sizeof(cl_char) || std::is_same<OutType, cl_short>::value)
|
||||||
|
{ // to char/uchar
|
||||||
|
clamp_ranges=
|
||||||
|
{{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
||||||
|
{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
||||||
|
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax-1.f)},
|
||||||
|
{HFF(outMin-0.0f), HFF(outMax - outMax * 0.5f * eps) },
|
||||||
|
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax - outMax * 0.5f * eps)}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // to ushort/int/uint/long/ulong
|
||||||
|
if (std::is_signed<OutType>::value)
|
||||||
|
{
|
||||||
|
clamp_ranges=
|
||||||
|
{ {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clamp_ranges=
|
||||||
|
{ {HFF(outMin), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||||
|
{HFF(outMin), HFF(CL_HALF_MAX)}};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DataInitInfo::halfRoundingMode = prev_half_round;
|
||||||
|
}
|
||||||
// clang-format on
|
// clang-format on
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
float DataInfoSpec<InType, OutType>::round_to_int(float f)
|
float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
||||||
{
|
{
|
||||||
static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
|
static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
|
||||||
-MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
|
-MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
|
||||||
@@ -281,8 +360,9 @@ float DataInfoSpec<InType, OutType>::round_to_int(float f)
|
|||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
|
long long
|
||||||
|
DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
||||||
{
|
{
|
||||||
static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
|
static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
|
||||||
MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
|
MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
|
||||||
@@ -313,8 +393,8 @@ long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
|
|||||||
return (long long)f;
|
return (long long)f;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
|
OutType DataInfoSpec<InType, OutType, InFP, OutFP>::absolute(const OutType &x)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
cl_uint u;
|
cl_uint u;
|
||||||
@@ -331,17 +411,30 @@ OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
|
|||||||
return u.f;
|
return u.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename T, bool fp> constexpr bool is_half()
|
||||||
void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|
||||||
{
|
{
|
||||||
if (std::is_same<cl_float, InType>::value)
|
return (std::is_same<cl_half, T>::value && fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
||||||
|
{
|
||||||
|
if (std::is_same<cl_float, InType>::value || is_in_half())
|
||||||
{
|
{
|
||||||
cl_float inVal = *in;
|
cl_float inVal = *in;
|
||||||
|
if (std::is_same<cl_half, InType>::value)
|
||||||
|
{
|
||||||
|
inVal = HTF(*in);
|
||||||
|
}
|
||||||
|
|
||||||
if (std::is_floating_point<OutType>::value)
|
if (std::is_floating_point<OutType>::value)
|
||||||
{
|
{
|
||||||
*out = (OutType)inVal;
|
*out = (OutType)inVal;
|
||||||
}
|
}
|
||||||
|
else if (is_out_half())
|
||||||
|
{
|
||||||
|
*out = HFF(*in);
|
||||||
|
}
|
||||||
else if (std::is_same<cl_ulong, OutType>::value)
|
else if (std::is_same<cl_ulong, OutType>::value)
|
||||||
{
|
{
|
||||||
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||||
@@ -376,6 +469,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|||||||
{
|
{
|
||||||
if (std::is_same<cl_float, OutType>::value)
|
if (std::is_same<cl_float, OutType>::value)
|
||||||
*out = (OutType)*in;
|
*out = (OutType)*in;
|
||||||
|
else if (is_out_half())
|
||||||
|
*out = static_cast<OutType>(HFD(*in));
|
||||||
else
|
else
|
||||||
*out = rint(*in);
|
*out = rint(*in);
|
||||||
}
|
}
|
||||||
@@ -418,7 +513,7 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|||||||
*out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
|
*out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (std::is_same<cl_float, OutType>::value)
|
else if (std::is_same<cl_float, OutType>::value || is_out_half())
|
||||||
{
|
{
|
||||||
cl_float outVal = 0.f;
|
cl_float outVal = 0.f;
|
||||||
|
|
||||||
@@ -465,7 +560,9 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
*out = outVal;
|
*out = std::is_same<cl_half, OutType>::value
|
||||||
|
? static_cast<OutType>(HFF(outVal))
|
||||||
|
: outVal;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -486,6 +583,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|||||||
// Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
|
// Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
|
||||||
*out = (*in == 0 ? 0.0 : *in);
|
*out = (*in == 0 ? 0.0 : *in);
|
||||||
}
|
}
|
||||||
|
else if (is_out_half())
|
||||||
|
*out = static_cast<OutType>(HFF(*in == 0 ? 0.f : *in));
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*out = (OutType)*in;
|
*out = (OutType)*in;
|
||||||
@@ -496,19 +595,26 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
|||||||
#define CLAMP(_lo, _x, _hi) \
|
#define CLAMP(_lo, _x, _hi) \
|
||||||
((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
|
((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv_sat(OutType *out,
|
||||||
|
InType *in)
|
||||||
{
|
{
|
||||||
if (std::is_floating_point<InType>::value)
|
if (std::is_floating_point<InType>::value || is_in_half())
|
||||||
{
|
{
|
||||||
if (std::is_floating_point<OutType>::value)
|
cl_float inVal = *in;
|
||||||
{ // in float/double, out float/double
|
if (is_in_half()) inVal = HTF(*in);
|
||||||
*out = (OutType)(*in);
|
|
||||||
|
if (std::is_floating_point<OutType>::value || is_out_half())
|
||||||
|
{ // in half/float/double, out half/float/double
|
||||||
|
if (is_out_half())
|
||||||
|
*out = static_cast<OutType>(HFF(inVal));
|
||||||
|
else
|
||||||
|
*out = (OutType)(is_in_half() ? inVal : *in);
|
||||||
}
|
}
|
||||||
else if ((std::is_same<InType, cl_float>::value)
|
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
||||||
&& std::is_same<cl_ulong, OutType>::value)
|
&& std::is_same<cl_ulong, OutType>::value)
|
||||||
{
|
{
|
||||||
cl_float x = round_to_int(*in);
|
cl_float x = round_to_int(is_in_half() ? HTF(*in) : *in);
|
||||||
|
|
||||||
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||||
// VS2005 (at least) on x86 uses fistp to store the float as a
|
// VS2005 (at least) on x86 uses fistp to store the float as a
|
||||||
@@ -536,47 +642,57 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
*out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
*out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
||||||
? 0xFFFFFFFFFFFFFFFFULL
|
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
||||||
: x < 0 ? 0 : (OutType)x;
|
: x < 0 ? 0
|
||||||
|
: (OutType)x;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if ((std::is_same<InType, cl_float>::value)
|
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
||||||
&& std::is_same<cl_long, OutType>::value)
|
&& std::is_same<cl_long, OutType>::value)
|
||||||
{
|
{
|
||||||
cl_float f = round_to_int(*in);
|
cl_float f = round_to_int(is_in_half() ? HTF(*in) : *in);
|
||||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
||||||
? 0x7FFFFFFFFFFFFFFFULL
|
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
||||||
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
||||||
? 0x8000000000000000LL
|
? (OutType)0x8000000000000000LL
|
||||||
: (OutType)f;
|
: (OutType)f;
|
||||||
}
|
}
|
||||||
else if (std::is_same<InType, cl_double>::value
|
else if (std::is_same<InType, cl_double>::value
|
||||||
&& std::is_same<cl_ulong, OutType>::value)
|
&& std::is_same<cl_ulong, OutType>::value)
|
||||||
{
|
{
|
||||||
InType f = rint(*in);
|
InType f = rint(*in);
|
||||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
||||||
? 0xFFFFFFFFFFFFFFFFULL
|
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
||||||
: f < 0 ? 0 : (OutType)f;
|
: f < 0 ? 0
|
||||||
|
: (OutType)f;
|
||||||
}
|
}
|
||||||
else if (std::is_same<InType, cl_double>::value
|
else if (std::is_same<InType, cl_double>::value
|
||||||
&& std::is_same<cl_long, OutType>::value)
|
&& std::is_same<cl_long, OutType>::value)
|
||||||
{
|
{
|
||||||
InType f = rint(*in);
|
InType f = rint(*in);
|
||||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
||||||
? 0x7FFFFFFFFFFFFFFFULL
|
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
||||||
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
||||||
? 0x8000000000000000LL
|
? (OutType)0x8000000000000000LL
|
||||||
: (OutType)f;
|
: (OutType)f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{ // in float/double, out char/uchar/short/ushort/int/uint
|
{ // in half/float/double, out char/uchar/short/ushort/int/uint
|
||||||
*out =
|
*out = CLAMP(ranges.first,
|
||||||
CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
|
round_to_int_and_clamp(is_in_half() ? inVal : *in),
|
||||||
|
ranges.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (std::is_integral<InType>::value
|
else if (std::is_integral<InType>::value
|
||||||
&& std::is_integral<OutType>::value)
|
&& std::is_integral<OutType>::value)
|
||||||
{
|
{
|
||||||
|
if (is_out_half())
|
||||||
|
{
|
||||||
|
*out = std::is_signed<InType>::value
|
||||||
|
? static_cast<OutType>(HFF((cl_float)*in))
|
||||||
|
: absolute(static_cast<OutType>(HFF((cl_float)*in)));
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
if ((std::is_signed<InType>::value
|
if ((std::is_signed<InType>::value
|
||||||
&& std::is_signed<OutType>::value)
|
&& std::is_signed<OutType>::value)
|
||||||
@@ -614,14 +730,64 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::set_allow_zero(uint8_t *allow,
|
||||||
const cl_uint &thread_id)
|
OutType *out,
|
||||||
|
InType *in)
|
||||||
|
{
|
||||||
|
// from double
|
||||||
|
if (std::is_same<InType, cl_double>::value)
|
||||||
|
*allow |= IsDoubleSubnormal(*in);
|
||||||
|
// from float
|
||||||
|
if (std::is_same<InType, cl_float>::value) *allow |= IsFloatSubnormal(*in);
|
||||||
|
// from half
|
||||||
|
if (is_in_half()) *allow |= IsHalfSubnormal(*in);
|
||||||
|
|
||||||
|
// handle the cases that the converted result is subnormal
|
||||||
|
// from double
|
||||||
|
if (std::is_same<OutType, cl_double>::value)
|
||||||
|
*allow |= IsDoubleSubnormal(*out);
|
||||||
|
// from float
|
||||||
|
if (std::is_same<OutType, cl_float>::value)
|
||||||
|
*allow |= IsFloatSubnormal(*out);
|
||||||
|
// from half
|
||||||
|
if (is_out_half()) *allow |= IsHalfSubnormal(*out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::init(const cl_uint &job_id,
|
||||||
|
const cl_uint &thread_id)
|
||||||
{
|
{
|
||||||
uint64_t ulStart = start;
|
uint64_t ulStart = start;
|
||||||
void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
|
void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
|
||||||
|
|
||||||
if (std::is_integral<InType>::value)
|
if (is_in_half())
|
||||||
|
{
|
||||||
|
cl_half *o = (cl_half *)pIn;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (gIsEmbedded)
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
o[i] = (cl_half)genrand_int32(mdv[thread_id]);
|
||||||
|
else
|
||||||
|
for (i = 0; i < size; i++) o[i] = (cl_half)((i + ulStart) % 0xffff);
|
||||||
|
|
||||||
|
if (0 == ulStart)
|
||||||
|
{
|
||||||
|
size_t tableSize = specialValuesHalf.size()
|
||||||
|
* sizeof(decltype(specialValuesHalf)::value_type);
|
||||||
|
if (sizeof(InType) * size < tableSize)
|
||||||
|
tableSize = sizeof(InType) * size;
|
||||||
|
memcpy((char *)(o + i) - tableSize, &specialValuesHalf.front(),
|
||||||
|
tableSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kUnsaturated == sat)
|
||||||
|
{
|
||||||
|
for (i = 0; i < size; i++) o[i] = clamp(o[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (std::is_integral<InType>::value)
|
||||||
{
|
{
|
||||||
InType *o = (InType *)pIn;
|
InType *o = (InType *)pIn;
|
||||||
if (sizeof(InType) <= sizeof(cl_short))
|
if (sizeof(InType) <= sizeof(cl_short))
|
||||||
@@ -776,10 +942,10 @@ void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename InType, typename OutType>
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||||
InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
|
InType DataInfoSpec<InType, OutType, InFP, OutFP>::clamp(const InType &in)
|
||||||
{
|
{
|
||||||
if (std::is_integral<OutType>::value)
|
if (std::is_integral<OutType>::value && !OutFP)
|
||||||
{
|
{
|
||||||
if (std::is_same<InType, cl_float>::value)
|
if (std::is_same<InType, cl_float>::value)
|
||||||
{
|
{
|
||||||
@@ -791,6 +957,11 @@ InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
|
|||||||
return dclamp(clamp_ranges[round].first, in,
|
return dclamp(clamp_ranges[round].first, in,
|
||||||
clamp_ranges[round].second);
|
clamp_ranges[round].second);
|
||||||
}
|
}
|
||||||
|
else if (std::is_same<InType, cl_half>::value && InFP)
|
||||||
|
{
|
||||||
|
return HFF(fclamp(HTF(clamp_ranges[round].first), HTF(in),
|
||||||
|
HTF(clamp_ranges[round].second)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -73,9 +73,9 @@ static void PrintUsage(void);
|
|||||||
test_status InitCL(cl_device_id device);
|
test_status InitCL(cl_device_id device);
|
||||||
|
|
||||||
|
|
||||||
const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
|
const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
|
||||||
"uint", "int", "float", "double",
|
"uint", "int", "half", "float",
|
||||||
"ulong", "long" };
|
"double", "ulong", "long" };
|
||||||
|
|
||||||
const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
|
const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
|
||||||
"_rtn", "_rtz" };
|
"_rtn", "_rtz" };
|
||||||
@@ -83,9 +83,9 @@ const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
|
|||||||
const char *gSaturationNames[2] = { "", "_sat" };
|
const char *gSaturationNames[2] = { "", "_sat" };
|
||||||
|
|
||||||
size_t gTypeSizes[kTypeCount] = {
|
size_t gTypeSizes[kTypeCount] = {
|
||||||
sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
|
sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
|
||||||
sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double),
|
sizeof(cl_uint), sizeof(cl_int), sizeof(cl_half), sizeof(cl_float),
|
||||||
sizeof(cl_ulong), sizeof(cl_long),
|
sizeof(cl_double), sizeof(cl_ulong), sizeof(cl_long),
|
||||||
};
|
};
|
||||||
|
|
||||||
char appName[64] = "ctest";
|
char appName[64] = "ctest";
|
||||||
@@ -221,13 +221,17 @@ static int ParseArgs(int argc, const char **argv)
|
|||||||
switch (*arg)
|
switch (*arg)
|
||||||
{
|
{
|
||||||
case 'd': gTestDouble ^= 1; break;
|
case 'd': gTestDouble ^= 1; break;
|
||||||
|
case 'h': gTestHalfs ^= 1; break;
|
||||||
case 'l': gSkipTesting ^= 1; break;
|
case 'l': gSkipTesting ^= 1; break;
|
||||||
case 'm': gMultithread ^= 1; break;
|
case 'm': gMultithread ^= 1; break;
|
||||||
case 'w': gWimpyMode ^= 1; break;
|
case 'w': gWimpyMode ^= 1; break;
|
||||||
case '[':
|
case '[':
|
||||||
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||||
break;
|
break;
|
||||||
case 'z': gForceFTZ ^= 1; break;
|
case 'z':
|
||||||
|
gForceFTZ ^= 1;
|
||||||
|
gForceHalfFTZ ^= 1;
|
||||||
|
break;
|
||||||
case 't': gTimeResults ^= 1; break;
|
case 't': gTimeResults ^= 1; break;
|
||||||
case 'a': gReportAverageTimes ^= 1; break;
|
case 'a': gReportAverageTimes ^= 1; break;
|
||||||
case '1':
|
case '1':
|
||||||
@@ -355,7 +359,6 @@ static void PrintUsage(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
test_status InitCL(cl_device_id device)
|
test_status InitCL(cl_device_id device)
|
||||||
{
|
{
|
||||||
int error, i;
|
int error, i;
|
||||||
@@ -412,6 +415,50 @@ test_status InitCL(cl_device_id device)
|
|||||||
}
|
}
|
||||||
gTestDouble &= gHasDouble;
|
gTestDouble &= gHasDouble;
|
||||||
|
|
||||||
|
if (is_extension_available(device, "cl_khr_fp16"))
|
||||||
|
{
|
||||||
|
gHasHalfs = 1;
|
||||||
|
|
||||||
|
cl_device_fp_config floatCapabilities = 0;
|
||||||
|
if ((error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
|
||||||
|
sizeof(floatCapabilities),
|
||||||
|
&floatCapabilities, NULL)))
|
||||||
|
floatCapabilities = 0;
|
||||||
|
|
||||||
|
if (0 == (CL_FP_DENORM & floatCapabilities)) gForceHalfFTZ ^= 1;
|
||||||
|
|
||||||
|
if (0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST))
|
||||||
|
{
|
||||||
|
char profileStr[128] = "";
|
||||||
|
// Verify that we are an embedded profile device
|
||||||
|
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE,
|
||||||
|
sizeof(profileStr), profileStr, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILURE: Could not get device profile: error %d\n",
|
||||||
|
error);
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(profileStr, "EMBEDDED_PROFILE"))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"FAILURE: non-embedded profile device does not support "
|
||||||
|
"CL_FP_ROUND_TO_NEAREST\n");
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO))
|
||||||
|
{
|
||||||
|
vlog_error("FAILURE: embedded profile device supports neither "
|
||||||
|
"CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
gIsHalfRTZ = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gTestHalfs &= gHasHalfs;
|
||||||
|
|
||||||
// detect whether profile of the device is embedded
|
// detect whether profile of the device is embedded
|
||||||
char profile[1024] = "";
|
char profile[1024] = "";
|
||||||
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
|
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
|
||||||
@@ -492,8 +539,12 @@ test_status InitCL(cl_device_id device)
|
|||||||
vlog("\tSubnormal values supported for floats? %s\n",
|
vlog("\tSubnormal values supported for floats? %s\n",
|
||||||
no_yes[0 != (CL_FP_DENORM & floatCapabilities)]);
|
no_yes[0 != (CL_FP_DENORM & floatCapabilities)]);
|
||||||
vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]);
|
vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]);
|
||||||
|
vlog("\tTesting with FTZ mode ON for halfs? %s\n",
|
||||||
|
no_yes[0 != gForceHalfFTZ]);
|
||||||
vlog("\tTesting with default RTZ mode for floats? %s\n",
|
vlog("\tTesting with default RTZ mode for floats? %s\n",
|
||||||
no_yes[0 != gIsRTZ]);
|
no_yes[0 != gIsRTZ]);
|
||||||
|
vlog("\tTesting with default RTZ mode for halfs? %s\n",
|
||||||
|
no_yes[0 != gIsHalfRTZ]);
|
||||||
vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]);
|
vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]);
|
||||||
if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]);
|
if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]);
|
||||||
vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]);
|
vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]);
|
||||||
@@ -503,5 +554,3 @@ test_status InitCL(cl_device_id device)
|
|||||||
vlog("\n");
|
vlog("\n");
|
||||||
return TEST_PASS;
|
return TEST_PASS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,24 +3,32 @@ set(MODULE_NAME BRUTEFORCE)
|
|||||||
set(${MODULE_NAME}_SOURCES
|
set(${MODULE_NAME}_SOURCES
|
||||||
binary_double.cpp
|
binary_double.cpp
|
||||||
binary_float.cpp
|
binary_float.cpp
|
||||||
|
binary_half.cpp
|
||||||
binary_i_double.cpp
|
binary_i_double.cpp
|
||||||
binary_i_float.cpp
|
binary_i_float.cpp
|
||||||
|
binary_i_half.cpp
|
||||||
binary_operator_double.cpp
|
binary_operator_double.cpp
|
||||||
binary_operator_float.cpp
|
binary_operator_float.cpp
|
||||||
|
binary_operator_half.cpp
|
||||||
binary_two_results_i_double.cpp
|
binary_two_results_i_double.cpp
|
||||||
binary_two_results_i_float.cpp
|
binary_two_results_i_float.cpp
|
||||||
|
binary_two_results_i_half.cpp
|
||||||
common.cpp
|
common.cpp
|
||||||
common.h
|
common.h
|
||||||
function_list.cpp
|
function_list.cpp
|
||||||
function_list.h
|
function_list.h
|
||||||
i_unary_double.cpp
|
i_unary_double.cpp
|
||||||
i_unary_float.cpp
|
i_unary_float.cpp
|
||||||
|
i_unary_half.cpp
|
||||||
macro_binary_double.cpp
|
macro_binary_double.cpp
|
||||||
macro_binary_float.cpp
|
macro_binary_float.cpp
|
||||||
|
macro_binary_half.cpp
|
||||||
macro_unary_double.cpp
|
macro_unary_double.cpp
|
||||||
macro_unary_float.cpp
|
macro_unary_float.cpp
|
||||||
|
macro_unary_half.cpp
|
||||||
mad_double.cpp
|
mad_double.cpp
|
||||||
mad_float.cpp
|
mad_float.cpp
|
||||||
|
mad_half.cpp
|
||||||
main.cpp
|
main.cpp
|
||||||
reference_math.cpp
|
reference_math.cpp
|
||||||
reference_math.h
|
reference_math.h
|
||||||
@@ -28,15 +36,20 @@ set(${MODULE_NAME}_SOURCES
|
|||||||
sleep.h
|
sleep.h
|
||||||
ternary_double.cpp
|
ternary_double.cpp
|
||||||
ternary_float.cpp
|
ternary_float.cpp
|
||||||
|
ternary_half.cpp
|
||||||
test_functions.h
|
test_functions.h
|
||||||
unary_double.cpp
|
unary_double.cpp
|
||||||
unary_float.cpp
|
unary_float.cpp
|
||||||
|
unary_half.cpp
|
||||||
unary_two_results_double.cpp
|
unary_two_results_double.cpp
|
||||||
unary_two_results_float.cpp
|
unary_two_results_float.cpp
|
||||||
|
unary_two_results_half.cpp
|
||||||
unary_two_results_i_double.cpp
|
unary_two_results_i_double.cpp
|
||||||
unary_two_results_i_float.cpp
|
unary_two_results_i_float.cpp
|
||||||
|
unary_two_results_i_half.cpp
|
||||||
unary_u_double.cpp
|
unary_u_double.cpp
|
||||||
unary_u_float.cpp
|
unary_u_float.cpp
|
||||||
|
unary_u_half.cpp
|
||||||
utility.cpp
|
utility.cpp
|
||||||
utility.h
|
utility.h
|
||||||
)
|
)
|
||||||
|
|||||||
784
test_conformance/math_brute_force/binary_half.cpp
Normal file
784
test_conformance/math_brute_force/binary_half.cpp
Normal file
@@ -0,0 +1,784 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "harness/errorHelpers.h"
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
#include "reference_math.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread specific data for a worker thread
|
||||||
|
struct ThreadInfo
|
||||||
|
{
|
||||||
|
clMemWrapper inBuf; // input buffer for the thread
|
||||||
|
clMemWrapper inBuf2; // input buffer for the thread
|
||||||
|
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||||
|
float maxError; // max error value. Init to 0.
|
||||||
|
double
|
||||||
|
maxErrorValue; // position of the max error value (param 1). Init to 0.
|
||||||
|
double maxErrorValue2; // position of the max error value (param 2). Init
|
||||||
|
// to 0.
|
||||||
|
MTdataHolder d;
|
||||||
|
|
||||||
|
clCommandQueueWrapper
|
||||||
|
tQueue; // per thread command queue to improve performance
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfoBase
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
float ulps; // max_allowed ulps
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
|
||||||
|
int isFDim;
|
||||||
|
int skipNanInf;
|
||||||
|
int isNextafter;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo : public TestInfoBase
|
||||||
|
{
|
||||||
|
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
const cl_half specialValuesHalf[] = {
|
||||||
|
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
float ulps = job->ulps;
|
||||||
|
fptr func = job->f->func;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
MTdata d = tinfo->d;
|
||||||
|
cl_int error;
|
||||||
|
const char *name = job->f->name;
|
||||||
|
|
||||||
|
int isFDim = job->isFDim;
|
||||||
|
int skipNanInf = job->skipNanInf;
|
||||||
|
int isNextafter = job->isNextafter;
|
||||||
|
cl_ushort *t;
|
||||||
|
cl_half *r;
|
||||||
|
std::vector<float> s(0), s2(0);
|
||||||
|
cl_uint j = 0;
|
||||||
|
|
||||||
|
RoundingMode oldRoundMode;
|
||||||
|
cl_int copysign_test = 0;
|
||||||
|
|
||||||
|
// start the map of the output arrays
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_size, 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||||
|
cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
|
||||||
|
j = 0;
|
||||||
|
int totalSpecialValueCount =
|
||||||
|
specialValuesHalfCount * specialValuesHalfCount;
|
||||||
|
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
|
if (job_id <= (cl_uint)indx)
|
||||||
|
{ // test edge cases
|
||||||
|
uint32_t x, y;
|
||||||
|
|
||||||
|
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||||
|
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||||
|
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = specialValuesHalf[x];
|
||||||
|
p2[j] = specialValuesHalf[y];
|
||||||
|
if (++x >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
x = 0;
|
||||||
|
y++;
|
||||||
|
if (y >= specialValuesHalfCount) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init any remaining values.
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
p2[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||||
|
buffer_size, p2, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern, buffer_size);
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
&pattern, sizeof(pattern), 0,
|
||||||
|
buffer_size, 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||||
|
&tinfo->inBuf2)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
FPU_mode_type oldMode;
|
||||||
|
oldRoundMode = kRoundToNearestEven;
|
||||||
|
if (isFDim)
|
||||||
|
{
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
memset(&oldMode, 0, sizeof(oldMode));
|
||||||
|
if (ftz) ForceFTZ(&oldMode);
|
||||||
|
|
||||||
|
// Set the rounding mode to match the device
|
||||||
|
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(name, "copysign")) copysign_test = 1;
|
||||||
|
|
||||||
|
#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2))
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
t = (cl_ushort *)r;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
s2.resize(buffer_elements);
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = cl_half_to_float(p[j]);
|
||||||
|
s2[j] = cl_half_to_float(p2[j]);
|
||||||
|
if (isNextafter)
|
||||||
|
r[j] = cl_half_from_float(reference_nextafterh(s[j], s2[j]),
|
||||||
|
CL_HALF_RTE);
|
||||||
|
else
|
||||||
|
r[j] = cl_half_from_float(ref_func(s[j], s2[j]), CL_HALF_RTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isFDim && ftz) RestoreFPState(&oldMode);
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||||
|
// an in order queue.
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_ushort *q = out[k];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (t[j] != q[j])
|
||||||
|
{
|
||||||
|
double correct;
|
||||||
|
if (isNextafter)
|
||||||
|
correct = reference_nextafterh(s[j], s2[j]);
|
||||||
|
else
|
||||||
|
correct = ref_func(s[j], s2[j]);
|
||||||
|
|
||||||
|
float test = cl_half_to_float(q[j]);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an input or
|
||||||
|
// output is a infinity or NaN or overflow
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference functions
|
||||||
|
// calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||||
|
|| IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
|
||||||
|
|| IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
float err = Ulp_Error_Half(q[j], correct);
|
||||||
|
int fail = !(fabsf(err) <= ulps);
|
||||||
|
|
||||||
|
if (fail && ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, ulps))
|
||||||
|
{
|
||||||
|
if (isNextafter)
|
||||||
|
{
|
||||||
|
correct = reference_nextafterh(s[j], s2[j], false);
|
||||||
|
err = Ulp_Error_Half(q[j], correct);
|
||||||
|
fail = !(fabsf(err) <= ulps);
|
||||||
|
}
|
||||||
|
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
double correct2, correct3;
|
||||||
|
float err2, err3;
|
||||||
|
if (isNextafter)
|
||||||
|
{
|
||||||
|
correct2 = reference_nextafterh(0.0, s2[j]);
|
||||||
|
correct3 = reference_nextafterh(-0.0, s2[j]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
correct2 = ref_func(0.0, s2[j]);
|
||||||
|
correct3 = ref_func(-0.0, s2[j]);
|
||||||
|
}
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2)
|
||||||
|
|| IsFloatInfinity(correct3)
|
||||||
|
|| IsFloatNaN(correct3))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto check_error = [&]() {
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
};
|
||||||
|
check_error();
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
if (fail && isNextafter)
|
||||||
|
{
|
||||||
|
correct2 =
|
||||||
|
reference_nextafterh(0.0, s2[j], false);
|
||||||
|
correct3 =
|
||||||
|
reference_nextafterh(-0.0, s2[j], false);
|
||||||
|
check_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow to omit denorm values for platforms with no
|
||||||
|
// denorm support for nextafter
|
||||||
|
if (fail && (isNextafter)
|
||||||
|
&& (correct <= cl_half_to_float(0x3FF))
|
||||||
|
&& (correct >= cl_half_to_float(0x83FF)))
|
||||||
|
{
|
||||||
|
fail = fail && (q[j] != p[j]);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// try with both args as zero
|
||||||
|
if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
double correct4, correct5;
|
||||||
|
float err4, err5;
|
||||||
|
|
||||||
|
if (isNextafter)
|
||||||
|
{
|
||||||
|
correct2 = reference_nextafterh(0.0, 0.0);
|
||||||
|
correct3 = reference_nextafterh(-0.0, 0.0);
|
||||||
|
correct4 = reference_nextafterh(0.0, -0.0);
|
||||||
|
correct5 = reference_nextafterh(-0.0, -0.0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
correct2 = ref_func(0.0, 0.0);
|
||||||
|
correct3 = ref_func(-0.0, 0.0);
|
||||||
|
correct4 = ref_func(0.0, -0.0);
|
||||||
|
correct5 = ref_func(-0.0, -0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if
|
||||||
|
// an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2)
|
||||||
|
|| IsFloatInfinity(correct3)
|
||||||
|
|| IsFloatNaN(correct3)
|
||||||
|
|| IsFloatInfinity(correct4)
|
||||||
|
|| IsFloatNaN(correct4)
|
||||||
|
|| IsFloatInfinity(correct5)
|
||||||
|
|| IsFloatNaN(correct5))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
err4 = Ulp_Error_Half(q[j], correct4);
|
||||||
|
err5 = Ulp_Error_Half(q[j], correct5);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps))
|
||||||
|
&& (!(fabsf(err4) <= ulps))
|
||||||
|
&& (!(fabsf(err5) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct4, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct5, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow to omit denorm values for platforms with no
|
||||||
|
// denorm support for nextafter
|
||||||
|
if (fail && (isNextafter)
|
||||||
|
&& (correct <= cl_half_to_float(0x3FF))
|
||||||
|
&& (correct >= cl_half_to_float(0x83FF)))
|
||||||
|
{
|
||||||
|
fail = fail && (q[j] != p2[j]);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
double correct2, correct3;
|
||||||
|
float err2, err3;
|
||||||
|
|
||||||
|
if (isNextafter)
|
||||||
|
{
|
||||||
|
correct2 = reference_nextafterh(s[j], 0.0);
|
||||||
|
correct3 = reference_nextafterh(s[j], -0.0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
correct2 = ref_func(s[j], 0.0);
|
||||||
|
correct3 = ref_func(s[j], -0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||||
|
|| IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto check_error = [&]() {
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
};
|
||||||
|
check_error();
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
if (fail && isNextafter)
|
||||||
|
{
|
||||||
|
correct2 =
|
||||||
|
reference_nextafterh(s[j], 0.0, false);
|
||||||
|
correct3 =
|
||||||
|
reference_nextafterh(s[j], -0.0, false);
|
||||||
|
check_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow to omit denorm values for platforms with no
|
||||||
|
// denorm support for nextafter
|
||||||
|
if (fail && (isNextafter)
|
||||||
|
&& (correct <= cl_half_to_float(0x3FF))
|
||||||
|
&& (correct >= cl_half_to_float(0x83FF)))
|
||||||
|
{
|
||||||
|
fail = fail && (q[j] != p2[j]);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fabsf(err) > tinfo->maxError)
|
||||||
|
{
|
||||||
|
tinfo->maxError = fabsf(err);
|
||||||
|
tinfo->maxErrorValue = s[j];
|
||||||
|
tinfo->maxErrorValue2 = s2[j];
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||||
|
"%a (0x%04x)}\nExpected: %a (half 0x%04x) "
|
||||||
|
"\nActual: %a (half 0x%04x) at index: %u\n",
|
||||||
|
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||||
|
p2[j], cl_half_to_float(r[j]), r[j], test, q[j],
|
||||||
|
j);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Half_common(const Func *f, MTdata d, int isNextafter,
|
||||||
|
bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfoBase test_info_base;
|
||||||
|
cl_int error;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
double maxErrorVal = 0.0;
|
||||||
|
double maxErrorVal2 = 0.0;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
// Init test_info
|
||||||
|
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||||
|
TestInfo test_info(test_info_base);
|
||||||
|
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ulps = f->half_ulps;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
|
||||||
|
test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
|
||||||
|
test_info.isNextafter = isNextafter;
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
|
||||||
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].inBuf2 =
|
||||||
|
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"Error: Unable to create sub-buffer of gOutBuffer[%d] "
|
||||||
|
"for region {%zd, %zd}\n",
|
||||||
|
(int)j, region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
// Accumulate the arithmetic errors
|
||||||
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
|
{
|
||||||
|
maxError = test_info.tinfo[i].maxError;
|
||||||
|
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||||
|
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
return TestFunc_Half_Half_Half_common(f, d, 0, relaxedMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
return TestFunc_Half_Half_Half_common(f, d, 1, relaxedMode);
|
||||||
|
}
|
||||||
548
test_conformance/math_brute_force/binary_i_half.cpp
Normal file
548
test_conformance/math_brute_force/binary_i_half.cpp
Normal file
@@ -0,0 +1,548 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Int,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread specific data for a worker thread
|
||||||
|
typedef struct ThreadInfo
|
||||||
|
{
|
||||||
|
clMemWrapper inBuf; // input buffer for the thread
|
||||||
|
clMemWrapper inBuf2; // input buffer for the thread
|
||||||
|
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||||
|
float maxError; // max error value. Init to 0.
|
||||||
|
double
|
||||||
|
maxErrorValue; // position of the max error value (param 1). Init to 0.
|
||||||
|
cl_int maxErrorValue2; // position of the max error value (param 2). Init
|
||||||
|
// to 0.
|
||||||
|
MTdataHolder d;
|
||||||
|
clCommandQueueWrapper
|
||||||
|
tQueue; // per thread command queue to improve performance
|
||||||
|
} ThreadInfo;
|
||||||
|
|
||||||
|
struct TestInfoBase
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
float ulps; // max_allowed ulps
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo : public TestInfoBase
|
||||||
|
{
|
||||||
|
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
const cl_half specialValuesHalf[] = {
|
||||||
|
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||||
|
|
||||||
|
const int specialValuesInt3[] = { 0, 1, 2, 3, 1022, 1023,
|
||||||
|
1024, INT_MIN, INT_MAX, -1, -2, -3,
|
||||||
|
-1022, -1023, -11024, -INT_MAX };
|
||||||
|
size_t specialValuesInt3Count = ARRAY_SIZE(specialValuesInt3);
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
float ulps = job->ulps;
|
||||||
|
fptr func = job->f->func;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
MTdata d = tinfo->d;
|
||||||
|
cl_uint j, k;
|
||||||
|
cl_int error;
|
||||||
|
const char *name = job->f->name;
|
||||||
|
cl_ushort *t;
|
||||||
|
cl_half *r;
|
||||||
|
std::vector<float> s;
|
||||||
|
cl_int *s2;
|
||||||
|
|
||||||
|
// start the map of the output arrays
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_elements * sizeof(cl_ushort), 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||||
|
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||||
|
j = 0;
|
||||||
|
int totalSpecialValueCount =
|
||||||
|
specialValuesHalfCount * specialValuesInt3Count;
|
||||||
|
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
if (job_id <= (cl_uint)indx)
|
||||||
|
{ // test edge cases
|
||||||
|
uint32_t x, y;
|
||||||
|
|
||||||
|
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||||
|
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||||
|
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = specialValuesHalf[x];
|
||||||
|
p2[j] = specialValuesInt3[y];
|
||||||
|
if (++x >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
x = 0;
|
||||||
|
y++;
|
||||||
|
if (y >= specialValuesInt3Count) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init any remaining values.
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
p2[j] = genrand_int32(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_elements * sizeof(cl_half), p, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||||
|
buffer_elements * sizeof(cl_int), p2, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern,
|
||||||
|
buffer_elements * sizeof(cl_half));
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], &pattern, sizeof(pattern), 0,
|
||||||
|
buffer_elements * sizeof(cl_half), 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||||
|
&tinfo->inBuf2)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
t = (cl_ushort *)r;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = cl_half_to_float(p[j]);
|
||||||
|
r[j] = HFF(func.f_fi(s[j], s2[j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||||
|
// an in order queue.
|
||||||
|
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||||
|
buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the last buffer
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0,
|
||||||
|
buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_ushort *q = out[k];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (t[j] != q[j])
|
||||||
|
{
|
||||||
|
float test = cl_half_to_float(q[j]);
|
||||||
|
double correct = func.f_fi(s[j], s2[j]);
|
||||||
|
float err = Ulp_Error_Half(q[j], correct);
|
||||||
|
int fail = !(fabsf(err) <= ulps);
|
||||||
|
|
||||||
|
if (fail && ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
double correct2, correct3;
|
||||||
|
float err2, err3;
|
||||||
|
correct2 = func.f_fi(0.0, s2[j]);
|
||||||
|
correct3 = func.f_fi(-0.0, s2[j]);
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fabsf(err) > tinfo->maxError)
|
||||||
|
{
|
||||||
|
tinfo->maxError = fabsf(err);
|
||||||
|
tinfo->maxErrorValue = s[j];
|
||||||
|
tinfo->maxErrorValue2 = s2[j];
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||||
|
"%d}\nExpected: %a (half 0x%04x) \nActual: %a "
|
||||||
|
"(half 0x%04x) at index: %d\n",
|
||||||
|
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||||
|
cl_half_to_float(r[j]), r[j], test, q[j],
|
||||||
|
(cl_uint)j);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfoBase test_info_base;
|
||||||
|
cl_int error;
|
||||||
|
size_t i, j;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
double maxErrorVal = 0.0;
|
||||||
|
cl_int maxErrorVal2 = 0;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
// Init test_info
|
||||||
|
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||||
|
TestInfo test_info(test_info_base);
|
||||||
|
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_int) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ulps = f->half_ulps;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
cl_buffer_region region2 = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_int),
|
||||||
|
test_info.subBufferSize * sizeof(cl_int) };
|
||||||
|
test_info.tinfo[i].inBuf2 =
|
||||||
|
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion2, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||||
|
"for region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
|
||||||
|
// Accumulate the arithmetic errors
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
|
{
|
||||||
|
maxError = test_info.tinfo[i].maxError;
|
||||||
|
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||||
|
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
661
test_conformance/math_brute_force/binary_operator_half.cpp
Normal file
661
test_conformance/math_brute_force/binary_operator_half.cpp
Normal file
@@ -0,0 +1,661 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread specific data for a worker thread
|
||||||
|
struct ThreadInfo
|
||||||
|
{
|
||||||
|
// Input and output buffers for the thread
|
||||||
|
clMemWrapper inBuf;
|
||||||
|
clMemWrapper inBuf2;
|
||||||
|
Buffers outBuf;
|
||||||
|
|
||||||
|
// max error value. Init to 0.
|
||||||
|
float maxError;
|
||||||
|
// position of the max error value (param 1). Init to 0.
|
||||||
|
double maxErrorValue;
|
||||||
|
// position of the max error value (param 2). Init to 0.
|
||||||
|
double maxErrorValue2;
|
||||||
|
MTdataHolder d;
|
||||||
|
|
||||||
|
// Per thread command queue to improve performance
|
||||||
|
clCommandQueueWrapper tQueue;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
float ulps; // max_allowed ulps
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
|
||||||
|
// no special fields
|
||||||
|
};
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
const cl_half specialValuesHalf[] = {
|
||||||
|
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
float ulps = job->ulps;
|
||||||
|
fptr func = job->f->func;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
MTdata d = tinfo->d;
|
||||||
|
cl_int error;
|
||||||
|
|
||||||
|
const char *name = job->f->name;
|
||||||
|
cl_half *r = 0;
|
||||||
|
std::vector<float> s(0), s2(0);
|
||||||
|
RoundingMode oldRoundMode;
|
||||||
|
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_half *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_size, 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init input array
|
||||||
|
cl_half *p = (cl_half *)gIn + thread_id * buffer_elements;
|
||||||
|
cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
|
||||||
|
cl_uint idx = 0;
|
||||||
|
int totalSpecialValueCount =
|
||||||
|
specialValuesHalfCount * specialValuesHalfCount;
|
||||||
|
int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
|
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||||
|
{
|
||||||
|
// Insert special values
|
||||||
|
uint32_t x, y;
|
||||||
|
|
||||||
|
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||||
|
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||||
|
|
||||||
|
for (; idx < buffer_elements; idx++)
|
||||||
|
{
|
||||||
|
p[idx] = specialValuesHalf[x];
|
||||||
|
p2[idx] = specialValuesHalf[y];
|
||||||
|
if (++x >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
x = 0;
|
||||||
|
y++;
|
||||||
|
if (y >= specialValuesHalfCount) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init any remaining values
|
||||||
|
for (; idx < buffer_elements; idx++)
|
||||||
|
{
|
||||||
|
p[idx] = (cl_half)genrand_int32(d);
|
||||||
|
p2[idx] = (cl_half)genrand_int32(d);
|
||||||
|
}
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||||
|
buffer_size, p2, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern, buffer_size);
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
&pattern, sizeof(pattern), 0,
|
||||||
|
buffer_size, 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||||
|
&tinfo->inBuf2)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
FPU_mode_type oldMode;
|
||||||
|
memset(&oldMode, 0, sizeof(oldMode));
|
||||||
|
if (ftz) ForceFTZ(&oldMode);
|
||||||
|
|
||||||
|
// Set the rounding mode to match the device
|
||||||
|
oldRoundMode = kRoundToNearestEven;
|
||||||
|
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
s2.resize(buffer_elements);
|
||||||
|
|
||||||
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = HTF(p[j]);
|
||||||
|
s2[j] = HTF(p2[j]);
|
||||||
|
r[j] = HFF(func.f_ff(s[j], s2[j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ftz) RestoreFPState(&oldMode);
|
||||||
|
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers but wait
|
||||||
|
// for the last buffer. This is an in order queue.
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
|
out[j] = (cl_ushort *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
|
||||||
|
for (size_t j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_half *q = out[k];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (r[j] != q[j])
|
||||||
|
{
|
||||||
|
float test = HTF(q[j]);
|
||||||
|
float correct = func.f_ff(s[j], s2[j]);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an input or
|
||||||
|
// output is a infinity or NaN or overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference functions
|
||||||
|
// calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||||
|
|| IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
|
||||||
|
|| IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float err = Ulp_Error_Half(q[j], correct);
|
||||||
|
|
||||||
|
int fail = !(fabsf(err) <= ulps);
|
||||||
|
|
||||||
|
if (fail && ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
double correct2, correct3;
|
||||||
|
float err2, err3;
|
||||||
|
|
||||||
|
correct2 = func.f_ff(0.0, s2[j]);
|
||||||
|
correct3 = func.f_ff(-0.0, s2[j]);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an
|
||||||
|
// input or output is a infinity or NaN or overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2)
|
||||||
|
|| IsFloatInfinity(correct3)
|
||||||
|
|| IsFloatNaN(correct3))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// try with both args as zero
|
||||||
|
if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
double correct4, correct5;
|
||||||
|
float err4, err5;
|
||||||
|
|
||||||
|
correct2 = func.f_ff(0.0, 0.0);
|
||||||
|
correct3 = func.f_ff(-0.0, 0.0);
|
||||||
|
correct4 = func.f_ff(0.0, -0.0);
|
||||||
|
correct5 = func.f_ff(-0.0, -0.0);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if
|
||||||
|
// an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2)
|
||||||
|
|| IsFloatInfinity(correct3)
|
||||||
|
|| IsFloatNaN(correct3)
|
||||||
|
|| IsFloatInfinity(correct4)
|
||||||
|
|| IsFloatNaN(correct4)
|
||||||
|
|| IsFloatInfinity(correct5)
|
||||||
|
|| IsFloatNaN(correct5))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
err4 = Ulp_Error_Half(q[j], correct4);
|
||||||
|
err5 = Ulp_Error_Half(q[j], correct5);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps))
|
||||||
|
&& (!(fabsf(err4) <= ulps))
|
||||||
|
&& (!(fabsf(err5) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct4, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct5, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
double correct2, correct3;
|
||||||
|
float err2, err3;
|
||||||
|
|
||||||
|
correct2 = func.f_ff(s[j], 0.0);
|
||||||
|
correct3 = func.f_ff(s[j], -0.0);
|
||||||
|
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an
|
||||||
|
// input or output is a infinity or NaN or overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsFloatInfinity(correct) || IsFloatNaN(correct)
|
||||||
|
|| IsFloatInfinity(correct2)
|
||||||
|
|| IsFloatNaN(correct2))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fabsf(err) > tinfo->maxError)
|
||||||
|
{
|
||||||
|
tinfo->maxError = fabsf(err);
|
||||||
|
tinfo->maxErrorValue = s[j];
|
||||||
|
tinfo->maxErrorValue2 = s2[j];
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
|
||||||
|
"%a (0x%04x)}\nExpected: %a (half 0x%04x) "
|
||||||
|
"\nActual: %a (half 0x%04x) at index: %zu\n",
|
||||||
|
name, sizeNames[k], err, s[j], p[j], s2[j],
|
||||||
|
p2[j], HTF(r[j]), r[j], test, q[j], j);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||||
|
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfo test_info{};
|
||||||
|
cl_int error;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
double maxErrorVal = 0.0;
|
||||||
|
double maxErrorVal2 = 0.0;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
// Init test_info
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ulps = f->half_ulps;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].inBuf2 =
|
||||||
|
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of "
|
||||||
|
"gOutBuffer[%d] for region {%zd, %zd}\n",
|
||||||
|
(int)j, region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
// Run the kernels
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
// Accumulate the arithmetic errors
|
||||||
|
for (cl_uint i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
|
{
|
||||||
|
maxError = test_info.tinfo[i].maxError;
|
||||||
|
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||||
|
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
477
test_conformance/math_brute_force/binary_two_results_i_half.cpp
Normal file
477
test_conformance/math_brute_force/binary_two_results_i_half.cpp
Normal file
@@ -0,0 +1,477 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Int, ParameterType::Half,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ComputeReferenceInfoF
|
||||||
|
{
|
||||||
|
const cl_half *x;
|
||||||
|
const cl_half *y;
|
||||||
|
cl_half *r;
|
||||||
|
int32_t *i;
|
||||||
|
double (*f_ffpI)(double, double, int *);
|
||||||
|
cl_uint lim;
|
||||||
|
cl_uint count;
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
|
||||||
|
{
|
||||||
|
ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
|
||||||
|
cl_uint lim = cri->lim;
|
||||||
|
cl_uint count = cri->count;
|
||||||
|
cl_uint off = jid * count;
|
||||||
|
const cl_half *x = cri->x + off;
|
||||||
|
const cl_half *y = cri->y + off;
|
||||||
|
cl_half *r = cri->r + off;
|
||||||
|
int32_t *i = cri->i + off;
|
||||||
|
double (*f)(double, double, int *) = cri->f_ffpI;
|
||||||
|
|
||||||
|
if (off + count > lim) count = lim - off;
|
||||||
|
|
||||||
|
for (cl_uint j = 0; j < count; ++j)
|
||||||
|
r[j] = HFF((float)f((double)HTF(x[j]), (double)HTF(y[j]), i + j));
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
Programs programs;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
KernelMatrix kernels;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
int64_t maxError2 = 0;
|
||||||
|
float maxErrorVal = 0.0f;
|
||||||
|
float maxErrorVal2 = 0.0f;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
|
||||||
|
// use larger type of output data to prevent overflowing buffer size
|
||||||
|
constexpr size_t buffer_size = BUFFER_SIZE / sizeof(int32_t);
|
||||||
|
|
||||||
|
cl_uint threadCount = GetThreadCount();
|
||||||
|
|
||||||
|
float half_ulps = f->half_ulps;
|
||||||
|
|
||||||
|
int testingRemquo = !strcmp(f->name, "remquo");
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_half *p = (cl_half *)gIn;
|
||||||
|
cl_half *p2 = (cl_half *)gIn2;
|
||||||
|
for (size_t j = 0; j < buffer_size; j++)
|
||||||
|
{
|
||||||
|
p[j] = (cl_half)genrand_int32(d);
|
||||||
|
p2[j] = (cl_half)genrand_int32(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
buffer_size * sizeof(cl_half), gIn, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||||
|
buffer_size * sizeof(cl_half), gIn2,
|
||||||
|
0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, BUFFER_SIZE,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||||
|
CL_FALSE, 0, BUFFER_SIZE,
|
||||||
|
gOut2[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||||
|
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
|
||||||
|
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
// align working group size with the bigger output type
|
||||||
|
size_t vectorSize = sizeValues[j] * sizeof(int32_t);
|
||||||
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error =
|
||||||
|
clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||||
|
sizeof(gInBuffer2), &gInBuffer2)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
|
|
||||||
|
if (threadCount > 1)
|
||||||
|
{
|
||||||
|
ComputeReferenceInfoF cri;
|
||||||
|
cri.x = p;
|
||||||
|
cri.y = p2;
|
||||||
|
cri.r = (cl_half *)gOut_Ref;
|
||||||
|
cri.i = (int32_t *)gOut_Ref2;
|
||||||
|
cri.f_ffpI = f->func.f_ffpI;
|
||||||
|
cri.lim = buffer_size;
|
||||||
|
cri.count = (cri.lim + threadCount - 1) / threadCount;
|
||||||
|
ThreadPool_Do(ReferenceF, threadCount, &cri);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cl_half *r = (cl_half *)gOut_Ref;
|
||||||
|
int32_t *r2 = (int32_t *)gOut_Ref2;
|
||||||
|
for (size_t j = 0; j < buffer_size; j++)
|
||||||
|
r[j] =
|
||||||
|
HFF((float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), r2 + j));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
cl_bool blocking =
|
||||||
|
(j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
|
||||||
|
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, 0,
|
||||||
|
BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray2 failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
cl_half *t = (cl_half *)gOut_Ref;
|
||||||
|
int32_t *t2 = (int32_t *)gOut_Ref2;
|
||||||
|
for (size_t j = 0; j < buffer_size; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_half *q = (cl_half *)(gOut[k]);
|
||||||
|
int32_t *q2 = (int32_t *)gOut2[k];
|
||||||
|
|
||||||
|
// Check for exact match to correctly rounded result
|
||||||
|
if (t[j] == q[j] && t2[j] == q2[j]) continue;
|
||||||
|
|
||||||
|
// Check for paired NaNs
|
||||||
|
if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cl_half test = ((cl_half *)q)[j];
|
||||||
|
int correct2 = INT_MIN;
|
||||||
|
float correct =
|
||||||
|
(float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), &correct2);
|
||||||
|
float err = Ulp_Error_Half(test, correct);
|
||||||
|
int64_t iErr;
|
||||||
|
|
||||||
|
// in case of remquo, we only care about the sign and last
|
||||||
|
// seven bits of integer as per the spec.
|
||||||
|
if (testingRemquo)
|
||||||
|
iErr = (long long)(q2[j] & 0x0000007f)
|
||||||
|
- (long long)(correct2 & 0x0000007f);
|
||||||
|
else
|
||||||
|
iErr = (long long)q2[j] - (long long)correct2;
|
||||||
|
|
||||||
|
// For remquo, if y = 0, x is infinite, or either is NaN
|
||||||
|
// then the standard either neglects to say what is returned
|
||||||
|
// in iptr or leaves it undefined or implementation defined.
|
||||||
|
int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f)
|
||||||
|
|| IsHalfNaN(p2[j]) || IsHalfNaN(p[j]);
|
||||||
|
if (iptrUndefined) iErr = 0;
|
||||||
|
|
||||||
|
int fail = !(fabsf(err) <= half_ulps && iErr == 0);
|
||||||
|
if (ftz && fail)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail && !(test == 0.0f && iErr == 0);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
int correct3i, correct4i;
|
||||||
|
float correct3 =
|
||||||
|
(float)f->func.f_ffpI(0.0, HTF(p2[j]), &correct3i);
|
||||||
|
float correct4 =
|
||||||
|
(float)f->func.f_ffpI(-0.0, HTF(p2[j]), &correct4i);
|
||||||
|
float err2 = Ulp_Error_Half(test, correct3);
|
||||||
|
float err3 = Ulp_Error_Half(test, correct4);
|
||||||
|
int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||||
|
int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||||
|
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(test == 0.0f
|
||||||
|
&& (iErr3 == 0 || iErr4 == 0));
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// try with both args as zero
|
||||||
|
if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
int correct7i, correct8i;
|
||||||
|
correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i);
|
||||||
|
correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i);
|
||||||
|
double correct7 =
|
||||||
|
f->func.f_ffpI(0.0, -0.0, &correct7i);
|
||||||
|
double correct8 =
|
||||||
|
f->func.f_ffpI(-0.0, -0.0, &correct8i);
|
||||||
|
err2 = Ulp_Error_Half(test, correct3);
|
||||||
|
err3 = Ulp_Error_Half(test, correct4);
|
||||||
|
float err4 = Ulp_Error_Half(test, correct7);
|
||||||
|
float err5 = Ulp_Error_Half(test, correct8);
|
||||||
|
iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||||
|
iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||||
|
int64_t iErr7 =
|
||||||
|
(long long)q2[j] - (long long)correct7i;
|
||||||
|
int64_t iErr8 =
|
||||||
|
(long long)q2[j] - (long long)correct8i;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps
|
||||||
|
&& iErr4 == 0))
|
||||||
|
&& (!(fabsf(err4) <= half_ulps
|
||||||
|
&& iErr7 == 0))
|
||||||
|
&& (!(fabsf(err5) <= half_ulps
|
||||||
|
&& iErr8 == 0)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||||
|
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||||
|
if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
|
||||||
|
if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct3, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct4, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct7, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct8, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(test == 0.0f
|
||||||
|
&& (iErr3 == 0 || iErr4 == 0
|
||||||
|
|| iErr7 == 0 || iErr8 == 0));
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
int correct3i, correct4i;
|
||||||
|
double correct3 =
|
||||||
|
f->func.f_ffpI(HTF(p[j]), 0.0, &correct3i);
|
||||||
|
double correct4 =
|
||||||
|
f->func.f_ffpI(HTF(p[j]), -0.0, &correct4i);
|
||||||
|
float err2 = Ulp_Error_Half(test, correct3);
|
||||||
|
float err3 = Ulp_Error_Half(test, correct4);
|
||||||
|
int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
|
||||||
|
int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
|
||||||
|
if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(test == 0.0f
|
||||||
|
&& (iErr3 == 0 || iErr4 == 0));
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fabsf(err) > maxError)
|
||||||
|
{
|
||||||
|
maxError = fabsf(err);
|
||||||
|
maxErrorVal = HTF(p[j]);
|
||||||
|
}
|
||||||
|
if (llabs(iErr) > maxError2)
|
||||||
|
{
|
||||||
|
maxError2 = llabs(iErr);
|
||||||
|
maxErrorVal2 = HTF(p[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: {%f, %" PRId64
|
||||||
|
"} ulp error at {%a, %a} "
|
||||||
|
"({0x%04x, 0x%04x}): *{%a, %d} ({0x%04x, "
|
||||||
|
"0x%8.8x}) vs. {%a, %d} ({0x%04x, 0x%8.8x})\n",
|
||||||
|
f->name, sizeNames[k], err, iErr, HTF(p[j]),
|
||||||
|
HTF(p2[j]), p[j], p2[j], HTF(t[j]), t2[j], t[j],
|
||||||
|
t2[j], HTF(test), q2[j], test, q2[j]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||||
|
" bufferSize:%10d \n",
|
||||||
|
i, step, BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
|
||||||
|
maxErrorVal, maxErrorVal2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2022 The Khronos Group Inc.
|
// Copyright (c) 2022-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -27,8 +27,11 @@ const char *GetTypeName(ParameterType type)
|
|||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
case ParameterType::Half: return "half";
|
||||||
case ParameterType::Float: return "float";
|
case ParameterType::Float: return "float";
|
||||||
case ParameterType::Double: return "double";
|
case ParameterType::Double: return "double";
|
||||||
|
case ParameterType::Short: return "short";
|
||||||
|
case ParameterType::UShort: return "ushort";
|
||||||
case ParameterType::Int: return "int";
|
case ParameterType::Int: return "int";
|
||||||
case ParameterType::UInt: return "uint";
|
case ParameterType::UInt: return "uint";
|
||||||
case ParameterType::Long: return "long";
|
case ParameterType::Long: return "long";
|
||||||
@@ -41,9 +44,13 @@ const char *GetUndefValue(ParameterType type)
|
|||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
case ParameterType::Half:
|
||||||
case ParameterType::Float:
|
case ParameterType::Float:
|
||||||
case ParameterType::Double: return "NAN";
|
case ParameterType::Double: return "NAN";
|
||||||
|
|
||||||
|
case ParameterType::Short:
|
||||||
|
case ParameterType::UShort: return "0x5678";
|
||||||
|
|
||||||
case ParameterType::Int:
|
case ParameterType::Int:
|
||||||
case ParameterType::UInt: return "0x12345678";
|
case ParameterType::UInt: return "0x12345678";
|
||||||
|
|
||||||
@@ -71,14 +78,17 @@ void EmitEnableExtension(std::ostringstream &kernel,
|
|||||||
const std::initializer_list<ParameterType> &types)
|
const std::initializer_list<ParameterType> &types)
|
||||||
{
|
{
|
||||||
bool needsFp64 = false;
|
bool needsFp64 = false;
|
||||||
|
bool needsFp16 = false;
|
||||||
|
|
||||||
for (const auto &type : types)
|
for (const auto &type : types)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case ParameterType::Double: needsFp64 = true; break;
|
case ParameterType::Double: needsFp64 = true; break;
|
||||||
|
case ParameterType::Half: needsFp16 = true; break;
|
||||||
case ParameterType::Float:
|
case ParameterType::Float:
|
||||||
|
case ParameterType::Short:
|
||||||
|
case ParameterType::UShort:
|
||||||
case ParameterType::Int:
|
case ParameterType::Int:
|
||||||
case ParameterType::UInt:
|
case ParameterType::UInt:
|
||||||
case ParameterType::Long:
|
case ParameterType::Long:
|
||||||
@@ -89,6 +99,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||||
|
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetBuildOptions(bool relaxed_mode)
|
std::string GetBuildOptions(bool relaxed_mode)
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2021 The Khronos Group Inc.
|
// Copyright (c) 2021-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -36,8 +36,11 @@ using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
|
|||||||
// Types supported for kernel code generation.
|
// Types supported for kernel code generation.
|
||||||
enum class ParameterType
|
enum class ParameterType
|
||||||
{
|
{
|
||||||
|
Half,
|
||||||
Float,
|
Float,
|
||||||
Double,
|
Double,
|
||||||
|
Short,
|
||||||
|
UShort,
|
||||||
Int,
|
Int,
|
||||||
UInt,
|
UInt,
|
||||||
Long,
|
Long,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -29,36 +29,41 @@
|
|||||||
// Only use ulps information in spir test
|
// Only use ulps information in spir test
|
||||||
#ifdef FUNCTION_LIST_ULPS_ONLY
|
#ifdef FUNCTION_LIST_ULPS_ONLY
|
||||||
|
|
||||||
#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
||||||
_ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
_ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||||
RELAXED_OFF, _type \
|
RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type, \
|
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
|
||||||
_relaxed_embedded_ulp) \
|
_type, _relaxed_embedded_ulp) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL }, \
|
||||||
_ulp, _ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, \
|
_ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp, \
|
||||||
_rmode, RELAXED_ON, _type \
|
_relaxed_embedded_ulp, _rmode, RELAXED_ON, _type \
|
||||||
}
|
}
|
||||||
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL }, \
|
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL }, \
|
||||||
{ NULL }, { NULL }, _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, \
|
{ NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY, \
|
||||||
_rmode, RELAXED_OFF, _type \
|
INFINITY, _rmode, RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) \
|
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp, \
|
||||||
|
_rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
_half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
|
||||||
|
_type \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define unaryF NULL
|
#define unaryF NULL
|
||||||
|
#define unaryOF NULL
|
||||||
#define i_unaryF NULL
|
#define i_unaryF NULL
|
||||||
#define unaryF_u NULL
|
#define unaryF_u NULL
|
||||||
#define macro_unaryF NULL
|
#define macro_unaryF NULL
|
||||||
#define binaryF NULL
|
#define binaryF NULL
|
||||||
|
#define binaryOF NULL
|
||||||
|
#define binaryF_nextafter NULL
|
||||||
#define binaryOperatorF NULL
|
#define binaryOperatorF NULL
|
||||||
#define binaryF_i NULL
|
#define binaryF_i NULL
|
||||||
#define macro_binaryF NULL
|
#define macro_binaryF NULL
|
||||||
@@ -76,31 +81,34 @@
|
|||||||
|
|
||||||
#else // FUNCTION_LIST_ULPS_ONLY
|
#else // FUNCTION_LIST_ULPS_ONLY
|
||||||
|
|
||||||
#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
||||||
{ (void*)reference_##_name##l }, { (void*)reference_##_name }, \
|
{ (void*)reference_##_name##l }, { (void*)reference_##_name }, \
|
||||||
_ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
_ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, \
|
||||||
RELAXED_OFF, _type \
|
RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type, \
|
#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
|
||||||
_relaxed_embedded_ulp) \
|
_type, _relaxed_embedded_ulp) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name }, \
|
||||||
{ (void*)reference_##_name##l }, \
|
{ (void*)reference_##_name##l }, \
|
||||||
{ (void*)reference_##relaxed_##_name }, _ulp, _ulp, _embedded_ulp, \
|
{ (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp, \
|
||||||
_relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type \
|
_embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode, \
|
||||||
|
RELAXED_ON, _type \
|
||||||
}
|
}
|
||||||
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), \
|
"half_" STRINGIFY(_name), "half_" STRINGIFY(_name), \
|
||||||
{ (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp, \
|
{ (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
|
||||||
|
_type \
|
||||||
}
|
}
|
||||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) \
|
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp, \
|
||||||
|
_rmode, _type) \
|
||||||
{ \
|
{ \
|
||||||
STRINGIFY(_name), _operator, { (void*)reference_##_name }, \
|
STRINGIFY(_name), _operator, { (void*)reference_##_name }, \
|
||||||
{ (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, \
|
{ (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp, \
|
||||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,85 +116,114 @@ static constexpr vtbl _unary = {
|
|||||||
"unary",
|
"unary",
|
||||||
TestFunc_Float_Float,
|
TestFunc_Float_Float,
|
||||||
TestFunc_Double_Double,
|
TestFunc_Double_Double,
|
||||||
|
TestFunc_Half_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr vtbl _unaryof = { "unaryof", TestFunc_Float_Float, NULL,
|
||||||
|
NULL };
|
||||||
|
|
||||||
static constexpr vtbl _i_unary = {
|
static constexpr vtbl _i_unary = {
|
||||||
"i_unary",
|
"i_unary",
|
||||||
TestFunc_Int_Float,
|
TestFunc_Int_Float,
|
||||||
TestFunc_Int_Double,
|
TestFunc_Int_Double,
|
||||||
|
TestFunc_Int_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _unary_u = {
|
static constexpr vtbl _unary_u = {
|
||||||
"unary_u",
|
"unary_u",
|
||||||
TestFunc_Float_UInt,
|
TestFunc_Float_UInt,
|
||||||
TestFunc_Double_ULong,
|
TestFunc_Double_ULong,
|
||||||
|
TestFunc_Half_UShort,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _macro_unary = {
|
static constexpr vtbl _macro_unary = {
|
||||||
"macro_unary",
|
"macro_unary",
|
||||||
TestMacro_Int_Float,
|
TestMacro_Int_Float,
|
||||||
TestMacro_Int_Double,
|
TestMacro_Int_Double,
|
||||||
|
TestMacro_Int_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _binary = {
|
static constexpr vtbl _binary = {
|
||||||
"binary",
|
"binary",
|
||||||
TestFunc_Float_Float_Float,
|
TestFunc_Float_Float_Float,
|
||||||
TestFunc_Double_Double_Double,
|
TestFunc_Double_Double_Double,
|
||||||
|
TestFunc_Half_Half_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr vtbl _binary_nextafter = {
|
||||||
|
"binary",
|
||||||
|
TestFunc_Float_Float_Float,
|
||||||
|
TestFunc_Double_Double_Double,
|
||||||
|
TestFunc_Half_Half_Half_nextafter,
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr vtbl _binaryof = { "binaryof", TestFunc_Float_Float_Float,
|
||||||
|
NULL, NULL };
|
||||||
|
|
||||||
static constexpr vtbl _binary_operator = {
|
static constexpr vtbl _binary_operator = {
|
||||||
"binaryOperator",
|
"binaryOperator",
|
||||||
TestFunc_Float_Float_Float_Operator,
|
TestFunc_Float_Float_Float_Operator,
|
||||||
TestFunc_Double_Double_Double_Operator,
|
TestFunc_Double_Double_Double_Operator,
|
||||||
|
TestFunc_Half_Half_Half_Operator,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _binary_i = {
|
static constexpr vtbl _binary_i = {
|
||||||
"binary_i",
|
"binary_i",
|
||||||
TestFunc_Float_Float_Int,
|
TestFunc_Float_Float_Int,
|
||||||
TestFunc_Double_Double_Int,
|
TestFunc_Double_Double_Int,
|
||||||
|
TestFunc_Half_Half_Int,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _macro_binary = {
|
static constexpr vtbl _macro_binary = {
|
||||||
"macro_binary",
|
"macro_binary",
|
||||||
TestMacro_Int_Float_Float,
|
TestMacro_Int_Float_Float,
|
||||||
TestMacro_Int_Double_Double,
|
TestMacro_Int_Double_Double,
|
||||||
|
TestMacro_Int_Half_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _ternary = {
|
static constexpr vtbl _ternary = {
|
||||||
"ternary",
|
"ternary",
|
||||||
TestFunc_Float_Float_Float_Float,
|
TestFunc_Float_Float_Float_Float,
|
||||||
TestFunc_Double_Double_Double_Double,
|
TestFunc_Double_Double_Double_Double,
|
||||||
|
TestFunc_Half_Half_Half_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _unary_two_results = {
|
static constexpr vtbl _unary_two_results = {
|
||||||
"unary_two_results",
|
"unary_two_results",
|
||||||
TestFunc_Float2_Float,
|
TestFunc_Float2_Float,
|
||||||
TestFunc_Double2_Double,
|
TestFunc_Double2_Double,
|
||||||
|
TestFunc_Half2_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _unary_two_results_i = {
|
static constexpr vtbl _unary_two_results_i = {
|
||||||
"unary_two_results_i",
|
"unary_two_results_i",
|
||||||
TestFunc_FloatI_Float,
|
TestFunc_FloatI_Float,
|
||||||
TestFunc_DoubleI_Double,
|
TestFunc_DoubleI_Double,
|
||||||
|
TestFunc_HalfI_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _binary_two_results_i = {
|
static constexpr vtbl _binary_two_results_i = {
|
||||||
"binary_two_results_i",
|
"binary_two_results_i",
|
||||||
TestFunc_FloatI_Float_Float,
|
TestFunc_FloatI_Float_Float,
|
||||||
TestFunc_DoubleI_Double_Double,
|
TestFunc_DoubleI_Double_Double,
|
||||||
|
TestFunc_HalfI_Half_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr vtbl _mad_tbl = {
|
static constexpr vtbl _mad_tbl = {
|
||||||
"ternary",
|
"ternary",
|
||||||
TestFunc_mad_Float,
|
TestFunc_mad_Float,
|
||||||
TestFunc_mad_Double,
|
TestFunc_mad_Double,
|
||||||
|
TestFunc_mad_Half,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define unaryF &_unary
|
#define unaryF &_unary
|
||||||
|
#define unaryOF &_unaryof
|
||||||
#define i_unaryF &_i_unary
|
#define i_unaryF &_i_unary
|
||||||
#define unaryF_u &_unary_u
|
#define unaryF_u &_unary_u
|
||||||
#define macro_unaryF &_macro_unary
|
#define macro_unaryF &_macro_unary
|
||||||
#define binaryF &_binary
|
#define binaryF &_binary
|
||||||
|
#define binaryF_nextafter &_binary_nextafter
|
||||||
|
#define binaryOF &_binaryof
|
||||||
#define binaryOperatorF &_binary_operator
|
#define binaryOperatorF &_binary_operator
|
||||||
#define binaryF_i &_binary_i
|
#define binaryF_i &_binary_i
|
||||||
#define macro_binaryF &_macro_binary
|
#define macro_binaryF &_macro_binary
|
||||||
@@ -198,25 +235,26 @@ static constexpr vtbl _mad_tbl = {
|
|||||||
|
|
||||||
#endif // FUNCTION_LIST_ULPS_ONLY
|
#endif // FUNCTION_LIST_ULPS_ONLY
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
const Func functionList[] = {
|
const Func functionList[] = {
|
||||||
ENTRY_EXT(acos, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||||
ENTRY(acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(acosh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
ENTRY(acospi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY_EXT(asin, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||||
ENTRY(asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(asinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
ENTRY(asinpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY_EXT(atan, 5.0f, 5.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
|
||||||
ENTRY(atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
ENTRY(atanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
ENTRY(atanpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
ENTRY(atan2, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||||
ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
|
||||||
ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(copysign, 0.0f, 0.0f, 0.f, FTZ_OFF, binaryF),
|
||||||
ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||||
0.00048828125f), // relaxed ulp 2^-11
|
0.00048828125f), // relaxed ulp 2^-11
|
||||||
ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||||
ENTRY_EXT(cospi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
ENTRY_EXT(cospi, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||||
0.00048828125f), // relaxed ulp 2^-11
|
0.00048828125f), // relaxed ulp 2^-11
|
||||||
// ENTRY( erfc, 16.0f,
|
// ENTRY( erfc, 16.0f,
|
||||||
// 16.0f, FTZ_OFF, unaryF),
|
// 16.0f, FTZ_OFF, unaryF),
|
||||||
@@ -225,81 +263,84 @@ const Func functionList[] = {
|
|||||||
// 16.0f, 16.0f, FTZ_OFF,
|
// 16.0f, 16.0f, FTZ_OFF,
|
||||||
// unaryF), //disabled for 1.0 due to lack
|
// unaryF), //disabled for 1.0 due to lack
|
||||||
// of reference implementation
|
// of reference implementation
|
||||||
ENTRY_EXT(exp, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
|
ENTRY_EXT(exp, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF,
|
||||||
4.0f), // relaxed error is actually overwritten in unary.c as it
|
4.0f), // relaxed error is actually overwritten in unary.c as it
|
||||||
// is 3+floor(fabs(2*x))
|
// is 3+floor(fabs(2*x))
|
||||||
ENTRY_EXT(exp2, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
|
ENTRY_EXT(exp2, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF,
|
||||||
4.0f), // relaxed error is actually overwritten in unary.c as it
|
4.0f), // relaxed error is actually overwritten in unary.c as it
|
||||||
// is 3+floor(fabs(2*x))
|
// is 3+floor(fabs(2*x))
|
||||||
ENTRY_EXT(exp10, 3.0f, 4.0f, 8192.0f, FTZ_OFF, unaryF,
|
ENTRY_EXT(exp10, 3.0f, 4.0f, 2.f, 8192.0f, FTZ_OFF, unaryF,
|
||||||
8192.0f), // relaxed error is actually overwritten in unary.c as
|
8192.0f), // relaxed error is actually overwritten in unary.c as
|
||||||
// it is 3+floor(fabs(2*x)) in derived mode,
|
// it is 3+floor(fabs(2*x)) in derived mode,
|
||||||
// in non-derived mode it uses the ulp error for half_exp10.
|
// in non-derived mode it uses the ulp error for half_exp10.
|
||||||
ENTRY(expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(expm1, 3.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
|
||||||
ENTRY(fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(fabs, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(fdim, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(floor, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
ENTRY(fma, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
||||||
ENTRY(fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(fmax, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(fmin, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(fmod, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(fmod, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
ENTRY(fract, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||||
ENTRY(frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
ENTRY(frexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
||||||
ENTRY(hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
|
ENTRY(hypot, 4.0f, 4.0f, 2.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
ENTRY(ilogb, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
||||||
ENTRY(isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
ENTRY(isfinite, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
ENTRY(isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
ENTRY(isinf, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
ENTRY(isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isless, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(islessequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
ENTRY(isnan, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
ENTRY(isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
ENTRY(isnormal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
ENTRY(isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
ENTRY(isunordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||||
ENTRY(ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
ENTRY(ldexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
||||||
ENTRY(lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
ENTRY(lgamma, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
||||||
ENTRY(lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
|
ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, FTZ_OFF,
|
||||||
ENTRY_EXT(log, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
unaryF_two_results_i),
|
||||||
|
ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||||
ENTRY_EXT(log2, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||||
ENTRY_EXT(log10, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
|
||||||
4.76837158203125e-7f), // relaxed ulp 2^-21
|
4.76837158203125e-7f), // relaxed ulp 2^-21
|
||||||
ENTRY(log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(log1p, 2.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(logb, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function,
|
ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF,
|
||||||
|
mad_function,
|
||||||
INFINITY), // in fast-relaxed-math mode it has to be either
|
INFINITY), // in fast-relaxed-math mode it has to be either
|
||||||
// exactly rounded fma or exactly rounded a*b+c
|
// exactly rounded fma or exactly rounded a*b+c
|
||||||
ENTRY(maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(maxmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(minmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
ENTRY(modf, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||||
ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
ENTRY(nan, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
||||||
ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(nextafter, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
|
||||||
ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF,
|
ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 8192.0f, FTZ_OFF, binaryF,
|
||||||
8192.0f), // in derived mode the ulp error is calculated as
|
8192.0f), // in derived mode the ulp error is calculated as
|
||||||
// exp2(y*log2(x)) and in non-derived it is the same as
|
// exp2(y*log2(x)) and in non-derived it is the same as
|
||||||
// half_pow
|
// half_pow
|
||||||
ENTRY(pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
|
||||||
ENTRY(powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
|
||||||
// ENTRY( reciprocal, 1.0f,
|
// ENTRY( reciprocal, 1.0f,
|
||||||
// 1.0f, FTZ_OFF, unaryF),
|
// 1.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||||
ENTRY(remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||||
ENTRY(rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
ENTRY(rootn, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
|
||||||
ENTRY(round, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(round, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
ENTRY(signbit, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
ENTRY_EXT(sin, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||||
0.00048828125f), // relaxed ulp 2^-11
|
0.00048828125f), // relaxed ulp 2^-11
|
||||||
ENTRY_EXT(sincos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF_two_results,
|
ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF,
|
||||||
|
unaryF_two_results,
|
||||||
0.00048828125f), // relaxed ulp 2^-11
|
0.00048828125f), // relaxed ulp 2^-11
|
||||||
ENTRY(sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
ENTRY(sinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY_EXT(sinpi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
|
||||||
0.00048828125f), // relaxed ulp 2^-11
|
0.00048828125f), // relaxed ulp 2^-11
|
||||||
{ "sqrt",
|
{ "sqrt",
|
||||||
"sqrt",
|
"sqrt",
|
||||||
@@ -308,6 +349,7 @@ const Func functionList[] = {
|
|||||||
{ NULL },
|
{ NULL },
|
||||||
3.0f,
|
3.0f,
|
||||||
0.0f,
|
0.0f,
|
||||||
|
0.0f,
|
||||||
4.0f,
|
4.0f,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
@@ -322,41 +364,42 @@ const Func functionList[] = {
|
|||||||
0.0f,
|
0.0f,
|
||||||
0.0f,
|
0.0f,
|
||||||
0.0f,
|
0.0f,
|
||||||
|
0.0f,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
FTZ_OFF,
|
FTZ_OFF,
|
||||||
RELAXED_OFF,
|
RELAXED_OFF,
|
||||||
unaryF },
|
unaryF },
|
||||||
ENTRY_EXT(
|
ENTRY_EXT(
|
||||||
tan, 5.0f, 5.0f, 8192.0f, FTZ_OFF, unaryF,
|
tan, 5.0f, 5.0f, 2.0f, 8192.0f, FTZ_OFF, unaryF,
|
||||||
8192.0f), // in derived mode it the ulp error is calculated as sin/cos
|
8192.0f), // in derived mode it the ulp error is calculated as sin/cos
|
||||||
// and in non-derived mode it is the same as half_tan.
|
// and in non-derived mode it is the same as half_tan.
|
||||||
ENTRY(tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
ENTRY(tanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
ENTRY(tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
|
ENTRY(tanpi, 6.0f, 6.0f, 2.0f, FTZ_OFF, unaryF),
|
||||||
// ENTRY( tgamma, 16.0f,
|
// ENTRY( tgamma, 16.0f,
|
||||||
// 16.0f, FTZ_OFF, unaryF),
|
// 16.0f, FTZ_OFF, unaryF),
|
||||||
// // Commented this out until we can be
|
// // Commented this out until we can be
|
||||||
// sure this requirement is realistic
|
// sure this requirement is realistic
|
||||||
ENTRY(trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
ENTRY(trunc, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||||
|
|
||||||
HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
|
||||||
HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
|
||||||
HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
|
||||||
|
|
||||||
// basic operations
|
// basic operations
|
||||||
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||||
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||||
{ "divide",
|
{ "divide",
|
||||||
"/",
|
"/",
|
||||||
{ (void*)reference_divide },
|
{ (void*)reference_divide },
|
||||||
@@ -364,6 +407,7 @@ const Func functionList[] = {
|
|||||||
{ (void*)reference_relaxed_divide },
|
{ (void*)reference_relaxed_divide },
|
||||||
2.5f,
|
2.5f,
|
||||||
0.0f,
|
0.0f,
|
||||||
|
0.0f,
|
||||||
3.0f,
|
3.0f,
|
||||||
2.5f,
|
2.5f,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
@@ -378,15 +422,17 @@ const Func functionList[] = {
|
|||||||
0.0f,
|
0.0f,
|
||||||
0.0f,
|
0.0f,
|
||||||
0.0f,
|
0.0f,
|
||||||
|
0.0f,
|
||||||
0.f,
|
0.f,
|
||||||
INFINITY,
|
INFINITY,
|
||||||
FTZ_OFF,
|
FTZ_OFF,
|
||||||
RELAXED_OFF,
|
RELAXED_OFF,
|
||||||
binaryOperatorF },
|
binaryOperatorF },
|
||||||
OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||||
OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, FTZ_OFF,
|
OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, FTZ_OFF,
|
||||||
unaryF), // A simple copy operation
|
unaryF), // A simple copy operation
|
||||||
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
};
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
|
const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -70,6 +70,9 @@ struct vtbl
|
|||||||
int (*DoubleTestFunc)(
|
int (*DoubleTestFunc)(
|
||||||
const struct Func *, MTdata,
|
const struct Func *, MTdata,
|
||||||
bool); // may be NULL if function is single precision only
|
bool); // may be NULL if function is single precision only
|
||||||
|
int (*HalfTestFunc)(
|
||||||
|
const struct Func *, MTdata,
|
||||||
|
bool); // may be NULL if function is single precision only
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Func
|
struct Func
|
||||||
@@ -82,6 +85,7 @@ struct Func
|
|||||||
fptr rfunc;
|
fptr rfunc;
|
||||||
float float_ulps;
|
float float_ulps;
|
||||||
float double_ulps;
|
float double_ulps;
|
||||||
|
float half_ulps;
|
||||||
float float_embedded_ulps;
|
float float_embedded_ulps;
|
||||||
float relaxed_error;
|
float relaxed_error;
|
||||||
float relaxed_embedded_error;
|
float relaxed_embedded_error;
|
||||||
|
|||||||
220
test_conformance/math_brute_force/i_unary_half.cpp
Normal file
220
test_conformance/math_brute_force/i_unary_half.cpp
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <cinttypes>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
|
void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Int_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
Programs programs;
|
||||||
|
KernelMatrix kernels;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
int ftz = f->ftz || 0 == (gHalfCapabilities & CL_FP_DENORM) || gForceFTZ;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
|
||||||
|
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||||
|
size_t bufferSizeIn = bufferElements * sizeof(cl_half);
|
||||||
|
size_t bufferSizeOut = bufferElements * sizeof(cl_int);
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
// This test is not using ThreadPool so we need to disable FTZ here
|
||||||
|
// for reference computations
|
||||||
|
FPU_mode_type oldMode;
|
||||||
|
DisableFTZ(&oldMode);
|
||||||
|
std::shared_ptr<int> at_scope_exit(
|
||||||
|
nullptr, [&oldMode](int *) { RestoreFPState(&oldMode); });
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
std::vector<float> s(bufferElements);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn;
|
||||||
|
|
||||||
|
for (size_t j = 0; j < bufferElements; j++) p[j] = (cl_ushort)i + j;
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSizeIn, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, bufferSizeOut);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, bufferSizeOut,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSizeOut,
|
||||||
|
0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
size_t vectorSize = sizeValues[j] * sizeof(cl_int);
|
||||||
|
size_t localCount = (bufferSizeOut + vectorSize - 1) / vectorSize;
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
int *r = (int *)gOut_Ref;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
s[j] = HTF(p[j]);
|
||||||
|
r[j] = f->func.i_f(s[j]);
|
||||||
|
}
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSizeOut, gOut[j], 0, NULL,
|
||||||
|
NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (t[j] != q[j])
|
||||||
|
{
|
||||||
|
if (ftz && IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
unsigned int correct0 = f->func.i_f(0.0);
|
||||||
|
unsigned int correct1 = f->func.i_f(-0.0);
|
||||||
|
if (q[j] == correct0 || q[j] == correct1) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t err = t[j] - q[j];
|
||||||
|
if (q[j] > t[j]) err = q[j] - t[j];
|
||||||
|
vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%04x): "
|
||||||
|
"*%d vs. %d\n",
|
||||||
|
f->name, sizeNames[k], err, s[j], p[j], t[j],
|
||||||
|
q[j]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||||
|
" bufferSize:%10zd \n",
|
||||||
|
i, step, bufferSizeOut);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
540
test_conformance/math_brute_force/macro_binary_half.cpp
Normal file
540
test_conformance/math_brute_force/macro_binary_half.cpp
Normal file
@@ -0,0 +1,540 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetBinaryKernel(kernel_name, builtin, ParameterType::Short,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ThreadInfo
|
||||||
|
{
|
||||||
|
clMemWrapper inBuf; // input buffer for the thread
|
||||||
|
clMemWrapper inBuf2; // input buffer for the thread
|
||||||
|
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||||
|
MTdataHolder d;
|
||||||
|
clCommandQueueWrapper
|
||||||
|
tQueue; // per thread command queue to improve performance
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfoBase
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo : public TestInfoBase
|
||||||
|
{
|
||||||
|
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
const cl_half specialValuesHalf[] = {
|
||||||
|
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
fptr func = job->f->func;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
MTdata d = tinfo->d;
|
||||||
|
cl_uint j, k;
|
||||||
|
cl_int error;
|
||||||
|
const char *name = job->f->name;
|
||||||
|
cl_short *t, *r;
|
||||||
|
std::vector<float> s(0), s2(0);
|
||||||
|
|
||||||
|
// start the map of the output arrays
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_short *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_size, 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||||
|
cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
|
||||||
|
j = 0;
|
||||||
|
int totalSpecialValueCount =
|
||||||
|
specialValuesHalfCount * specialValuesHalfCount;
|
||||||
|
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||||
|
|
||||||
|
if (job_id <= (cl_uint)indx)
|
||||||
|
{ // test edge cases
|
||||||
|
uint32_t x, y;
|
||||||
|
|
||||||
|
x = (job_id * buffer_elements) % specialValuesHalfCount;
|
||||||
|
y = (job_id * buffer_elements) / specialValuesHalfCount;
|
||||||
|
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = specialValuesHalf[x];
|
||||||
|
p2[j] = specialValuesHalf[y];
|
||||||
|
if (++x >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
x = 0;
|
||||||
|
y++;
|
||||||
|
if (y >= specialValuesHalfCount) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init any remaining values.
|
||||||
|
for (; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
p2[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
|
||||||
|
buffer_size, p2, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern, buffer_size);
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
&pattern, sizeof(pattern), 0,
|
||||||
|
buffer_size, 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
|
||||||
|
&tinfo->inBuf2)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
t = (cl_short *)r;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
s2.resize(buffer_elements);
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = cl_half_to_float(p[j]);
|
||||||
|
s2[j] = cl_half_to_float(p2[j]);
|
||||||
|
r[j] = (short)func.i_ff(s[j], s2[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||||
|
// an in order queue.
|
||||||
|
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the last buffer
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
CL_TRUE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
cl_short *q = (cl_short *)out[0];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
|
||||||
|
{
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)func.i_ff(0.0f, 0.0f);
|
||||||
|
short correct2 = (short)func.i_ff(0.0f, -0.0f);
|
||||||
|
short correct3 = (short)func.i_ff(-0.0f, 0.0f);
|
||||||
|
short correct4 = (short)func.i_ff(-0.0f, -0.0f);
|
||||||
|
|
||||||
|
if (correct == q[j] || correct2 == q[j]
|
||||||
|
|| correct3 == q[j] || correct4 == q[j])
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
short correct = (short)func.i_ff(0.0f, s2[j]);
|
||||||
|
short correct2 = (short)func.i_ff(-0.0f, s2[j]);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)func.i_ff(s[j], 0.0f);
|
||||||
|
short correct2 = (short)func.i_ff(s[j], -0.0f);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
short err = t[j] - q[j];
|
||||||
|
if (q[j] > t[j]) err = q[j] - t[j];
|
||||||
|
vlog_error(
|
||||||
|
"\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
|
||||||
|
"(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x (index: %d)\n",
|
||||||
|
name, err, s[j], p[j], s2[j], p2[j], t[j], q[j], j);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||||
|
k++)
|
||||||
|
{
|
||||||
|
q = out[k];
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (-t[j] != q[j])
|
||||||
|
{
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)-func.i_ff(0.0f, 0.0f);
|
||||||
|
short correct2 = (short)-func.i_ff(0.0f, -0.0f);
|
||||||
|
short correct3 = (short)-func.i_ff(-0.0f, 0.0f);
|
||||||
|
short correct4 = (short)-func.i_ff(-0.0f, -0.0f);
|
||||||
|
|
||||||
|
if (correct == q[j] || correct2 == q[j]
|
||||||
|
|| correct3 == q[j] || correct4 == q[j])
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
short correct = (short)-func.i_ff(0.0f, s2[j]);
|
||||||
|
short correct2 = (short)-func.i_ff(-0.0f, s2[j]);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(p2[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)-func.i_ff(s[j], 0.0f);
|
||||||
|
short correct2 = (short)-func.i_ff(s[j], -0.0f);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_ushort err = -t[j] - q[j];
|
||||||
|
if (q[j] > -t[j]) err = q[j] + t[j];
|
||||||
|
vlog_error("\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
|
||||||
|
"(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x "
|
||||||
|
"(index: %d)\n",
|
||||||
|
name, err, s[j], p[j], s2[j], p2[j], -t[j], q[j], j);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestMacro_Int_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfoBase test_info_base;
|
||||||
|
cl_int error;
|
||||||
|
size_t i, j;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
// Init test_info
|
||||||
|
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||||
|
TestInfo test_info(test_info_base);
|
||||||
|
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].inBuf2 =
|
||||||
|
clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf2)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||||
|
"for region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
427
test_conformance/math_brute_force/macro_unary_half.cpp
Normal file
427
test_conformance/math_brute_force/macro_unary_half.cpp
Normal file
@@ -0,0 +1,427 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Short,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread specific data for a worker thread
|
||||||
|
struct ThreadInfo
|
||||||
|
{
|
||||||
|
clMemWrapper inBuf; // input buffer for the thread
|
||||||
|
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||||
|
clCommandQueueWrapper
|
||||||
|
tQueue; // per thread command queue to improve performance
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfoBase
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo : public TestInfoBase
|
||||||
|
{
|
||||||
|
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||||
|
cl_uint scale = job->scale;
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
fptr func = job->f->func;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
cl_uint j, k;
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
const char *name = job->f->name;
|
||||||
|
std::vector<float> s(0);
|
||||||
|
|
||||||
|
int signbit_test = 0;
|
||||||
|
if (!strcmp(name, "signbit")) signbit_test = 1;
|
||||||
|
|
||||||
|
#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
|
||||||
|
|
||||||
|
// start the map of the output arrays
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_short *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_size, 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the new values to the input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||||
|
for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern, buffer_size);
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
&pattern, sizeof(pattern), 0,
|
||||||
|
buffer_size, 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_short *r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
cl_short *t = (cl_short *)r;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = cl_half_to_float(p[j]);
|
||||||
|
if (!strcmp(name, "isnormal"))
|
||||||
|
{
|
||||||
|
if ((IsHalfSubnormal(p[j]) == 0) && !((p[j] & 0x7fffU) >= 0x7c00U)
|
||||||
|
&& ((p[j] & 0x7fffU) != 0x0000U))
|
||||||
|
r[j] = 1;
|
||||||
|
else
|
||||||
|
r[j] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
r[j] = (short)ref_func(s[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||||
|
// an in order queue.
|
||||||
|
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Wait for the last buffer
|
||||||
|
out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
CL_TRUE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
cl_short *q = out[0];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
|
||||||
|
{
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)ref_func(+0.0f);
|
||||||
|
short correct2 = (short)ref_func(-0.0f);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
short err = t[j] - q[j];
|
||||||
|
if (q[j] > t[j]) err = q[j] - t[j];
|
||||||
|
vlog_error("\nERROR: %s: %d ulp error at %a (0x%04x)\nExpected: "
|
||||||
|
"%d vs. %d\n",
|
||||||
|
name, err, s[j], p[j], t[j], q[j]);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
|
||||||
|
k++)
|
||||||
|
{
|
||||||
|
q = out[k];
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (-t[j] != q[j])
|
||||||
|
{
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
short correct = (short)-ref_func(+0.0f);
|
||||||
|
short correct2 = (short)-ref_func(-0.0f);
|
||||||
|
if (correct == q[j] || correct2 == q[j]) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
short err = -t[j] - q[j];
|
||||||
|
if (q[j] > -t[j]) err = q[j] + t[j];
|
||||||
|
vlog_error("\nERROR: %s%s: %d ulp error at %a "
|
||||||
|
"(0x%04x)\nExpected: %d \nActual: %d\n",
|
||||||
|
name, sizeNames[k], err, s[j], p[j], -t[j], q[j]);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestMacro_Int_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfoBase test_info_base;
|
||||||
|
cl_int error;
|
||||||
|
size_t i, j;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
// Init test_info
|
||||||
|
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||||
|
TestInfo test_info(test_info_base);
|
||||||
|
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount =
|
||||||
|
std::max((cl_uint)1,
|
||||||
|
(cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||||
|
"for region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
201
test_conformance/math_brute_force/mad_half.cpp
Normal file
201
test_conformance/math_brute_force/mad_half.cpp
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_mad_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
Programs programs;
|
||||||
|
KernelMatrix kernels;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
float maxError = 0.0f;
|
||||||
|
|
||||||
|
float maxErrorVal = 0.0f;
|
||||||
|
float maxErrorVal2 = 0.0f;
|
||||||
|
float maxErrorVal3 = 0.0f;
|
||||||
|
size_t bufferSize = BUFFER_SIZE;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), bufferSize);
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn;
|
||||||
|
cl_ushort *p2 = (cl_ushort *)gIn2;
|
||||||
|
cl_ushort *p3 = (cl_ushort *)gIn3;
|
||||||
|
for (size_t j = 0; j < bufferSize / sizeof(cl_ushort); j++)
|
||||||
|
{
|
||||||
|
p[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
p2[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
p3[j] = (cl_ushort)genrand_int32(d);
|
||||||
|
}
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn2, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn3, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, BUFFER_SIZE,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
size_t vectorSize = sizeof(cl_half) * sizeValues[j];
|
||||||
|
size_t localCount = (bufferSize + vectorSize - 1)
|
||||||
|
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||||
|
sizeof(gInBuffer2), &gInBuffer2)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||||
|
sizeof(gInBuffer3), &gInBuffer3)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
|
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data - no verification possible. MAD is a random number
|
||||||
|
// generator.
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("pass");
|
||||||
|
|
||||||
|
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||||
|
maxErrorVal3);
|
||||||
|
}
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -49,6 +49,8 @@
|
|||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
|
|
||||||
#define kPageSize 4096
|
#define kPageSize 4096
|
||||||
|
#define HALF_REQUIRED_FEATURES_1 (CL_FP_ROUND_TO_ZERO)
|
||||||
|
#define HALF_REQUIRED_FEATURES_2 (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)
|
||||||
#define DOUBLE_REQUIRED_FEATURES \
|
#define DOUBLE_REQUIRED_FEATURES \
|
||||||
(CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
|
(CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
|
||||||
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
|
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
|
||||||
@@ -81,6 +83,8 @@ static int gTestFastRelaxed = 1;
|
|||||||
*/
|
*/
|
||||||
int gFastRelaxedDerived = 1;
|
int gFastRelaxedDerived = 1;
|
||||||
static int gToggleCorrectlyRoundedDivideSqrt = 0;
|
static int gToggleCorrectlyRoundedDivideSqrt = 0;
|
||||||
|
int gHasHalf = 0;
|
||||||
|
cl_device_fp_config gHalfCapabilities = 0;
|
||||||
int gDeviceILogb0 = 1;
|
int gDeviceILogb0 = 1;
|
||||||
int gDeviceILogbNaN = 1;
|
int gDeviceILogbNaN = 1;
|
||||||
int gCheckTininessBeforeRounding = 1;
|
int gCheckTininessBeforeRounding = 1;
|
||||||
@@ -104,6 +108,8 @@ cl_device_fp_config gFloatCapabilities = 0;
|
|||||||
int gWimpyReductionFactor = 32;
|
int gWimpyReductionFactor = 32;
|
||||||
int gVerboseBruteForce = 0;
|
int gVerboseBruteForce = 0;
|
||||||
|
|
||||||
|
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||||
|
|
||||||
static int ParseArgs(int argc, const char **argv);
|
static int ParseArgs(int argc, const char **argv);
|
||||||
static void PrintUsage(void);
|
static void PrintUsage(void);
|
||||||
static void PrintFunctions(void);
|
static void PrintFunctions(void);
|
||||||
@@ -167,7 +173,6 @@ static int doTest(const char *name)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
if (0 == strcmp("ilogb", func_data->name))
|
if (0 == strcmp("ilogb", func_data->name))
|
||||||
{
|
{
|
||||||
@@ -236,6 +241,23 @@ static int doTest(const char *name)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (gHasHalf && NULL != func_data->vtbl_ptr->HalfTestFunc)
|
||||||
|
{
|
||||||
|
gTestCount++;
|
||||||
|
vlog("%3d: ", gTestCount);
|
||||||
|
if (func_data->vtbl_ptr->HalfTestFunc(func_data, gMTdata,
|
||||||
|
false /* relaxed mode*/))
|
||||||
|
{
|
||||||
|
gFailCount++;
|
||||||
|
error++;
|
||||||
|
if (gStopOnError)
|
||||||
|
{
|
||||||
|
gSkipRestOfTests = true;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
@@ -408,6 +430,8 @@ static int ParseArgs(int argc, const char **argv)
|
|||||||
|
|
||||||
case 'm': singleThreaded ^= 1; break;
|
case 'm': singleThreaded ^= 1; break;
|
||||||
|
|
||||||
|
case 'g': gHasHalf ^= 1; break;
|
||||||
|
|
||||||
case 'r': gTestFastRelaxed ^= 1; break;
|
case 'r': gTestFastRelaxed ^= 1; break;
|
||||||
|
|
||||||
case 's': gStopOnError ^= 1; break;
|
case 's': gStopOnError ^= 1; break;
|
||||||
@@ -540,6 +564,8 @@ static void PrintUsage(void)
|
|||||||
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
||||||
"on)\n");
|
"on)\n");
|
||||||
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
|
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
|
||||||
|
vlog("\t\t-g\tToggle half precision testing. (Default: on if khr_fp_16 "
|
||||||
|
"on)\n");
|
||||||
vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
|
vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
|
||||||
vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
|
vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
|
||||||
"precision. (Default: on)\n");
|
"precision. (Default: on)\n");
|
||||||
@@ -640,6 +666,54 @@ test_status InitCL(cl_device_id device)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gFloatToHalfRoundingMode = kRoundToNearestEven;
|
||||||
|
if (is_extension_available(gDevice, "cl_khr_fp16"))
|
||||||
|
{
|
||||||
|
gHasHalf ^= 1;
|
||||||
|
#if defined(CL_DEVICE_HALF_FP_CONFIG)
|
||||||
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_HALF_FP_CONFIG,
|
||||||
|
sizeof(gHalfCapabilities),
|
||||||
|
&gHalfCapabilities, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"ERROR: Unable to get device CL_DEVICE_HALF_FP_CONFIG. (%d)\n",
|
||||||
|
error);
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
if (HALF_REQUIRED_FEATURES_1
|
||||||
|
!= (gHalfCapabilities & HALF_REQUIRED_FEATURES_1)
|
||||||
|
&& HALF_REQUIRED_FEATURES_2
|
||||||
|
!= (gHalfCapabilities & HALF_REQUIRED_FEATURES_2))
|
||||||
|
{
|
||||||
|
char list[300] = "";
|
||||||
|
if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_NEAREST))
|
||||||
|
strncat(list, "CL_FP_ROUND_TO_NEAREST, ", sizeof(list) - 1);
|
||||||
|
if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_ZERO))
|
||||||
|
strncat(list, "CL_FP_ROUND_TO_ZERO, ", sizeof(list) - 1);
|
||||||
|
if (0 == (gHalfCapabilities & CL_FP_INF_NAN))
|
||||||
|
strncat(list, "CL_FP_INF_NAN, ", sizeof(list) - 1);
|
||||||
|
vlog_error("ERROR: required half features are missing: %s\n", list);
|
||||||
|
|
||||||
|
return TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((gHalfCapabilities & CL_FP_ROUND_TO_NEAREST) != 0)
|
||||||
|
{
|
||||||
|
gHalfRoundingMode = CL_HALF_RTE;
|
||||||
|
}
|
||||||
|
else // due to above condition it must be RTZ
|
||||||
|
{
|
||||||
|
gHalfRoundingMode = CL_HALF_RTZ;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
vlog_error("FAIL: device says it supports cl_khr_fp16 but "
|
||||||
|
"CL_DEVICE_HALF_FP_CONFIG is not in the headers!\n");
|
||||||
|
return TEST_FAIL;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t deviceFrequency = 0;
|
uint32_t deviceFrequency = 0;
|
||||||
size_t configSize = sizeof(deviceFrequency);
|
size_t configSize = sizeof(deviceFrequency);
|
||||||
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
||||||
@@ -829,6 +903,7 @@ test_status InitCL(cl_device_id device)
|
|||||||
"Bruteforce_Ulp_Error_Double() for more details.\n\n");
|
"Bruteforce_Ulp_Error_Double() for more details.\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vlog("\tTesting half precision? %s\n", no_yes[0 != gHasHalf]);
|
||||||
vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
|
vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
|
||||||
if (gIsEmbedded)
|
if (gIsEmbedded)
|
||||||
vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
|
vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -4699,6 +4699,49 @@ double reference_nextafter(double xx, double yy)
|
|||||||
return a.f;
|
return a.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_half reference_nanh(cl_ushort x)
|
||||||
|
{
|
||||||
|
cl_ushort u;
|
||||||
|
cl_half h;
|
||||||
|
u = x | 0x7e00U;
|
||||||
|
memcpy(&h, &u, sizeof(cl_half));
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
float reference_nextafterh(float xx, float yy, bool allow_denorms)
|
||||||
|
{
|
||||||
|
cl_half tmp_a = cl_half_from_float(xx, CL_HALF_RTE);
|
||||||
|
cl_half tmp_b = cl_half_from_float(yy, CL_HALF_RTE);
|
||||||
|
float x = cl_half_to_float(tmp_a);
|
||||||
|
float y = cl_half_to_float(tmp_b);
|
||||||
|
|
||||||
|
// take care of nans
|
||||||
|
if (x != x) return x;
|
||||||
|
|
||||||
|
if (y != y) return y;
|
||||||
|
|
||||||
|
if (x == y) return y;
|
||||||
|
|
||||||
|
short a_h = cl_half_from_float(x, CL_HALF_RTE);
|
||||||
|
short b_h = cl_half_from_float(y, CL_HALF_RTE);
|
||||||
|
short oa_h = a_h;
|
||||||
|
|
||||||
|
if (a_h & 0x8000) a_h = 0x8000 - a_h;
|
||||||
|
if (b_h & 0x8000) b_h = 0x8000 - b_h;
|
||||||
|
|
||||||
|
a_h += (a_h < b_h) ? 1 : -1;
|
||||||
|
a_h = (a_h < 0) ? (cl_short)0x8000 - a_h : a_h;
|
||||||
|
|
||||||
|
if (!allow_denorms && IsHalfSubnormal(a_h))
|
||||||
|
{
|
||||||
|
if (cl_half_to_float(0x7fff & oa_h) < cl_half_to_float(0x7fff & a_h))
|
||||||
|
a_h = (a_h & 0x8000) ? 0x8400 : 0x0400;
|
||||||
|
else
|
||||||
|
a_h = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cl_half_to_float(a_h);
|
||||||
|
}
|
||||||
|
|
||||||
long double reference_nextafterl(long double xx, long double yy)
|
long double reference_nextafterl(long double xx, long double yy)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -20,6 +20,7 @@
|
|||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
|
#include "CL/cl_half.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// -- for testing float --
|
// -- for testing float --
|
||||||
@@ -160,6 +161,8 @@ long double reference_fractl(long double, long double*);
|
|||||||
long double reference_fmal(long double, long double, long double);
|
long double reference_fmal(long double, long double, long double);
|
||||||
long double reference_madl(long double, long double, long double);
|
long double reference_madl(long double, long double, long double);
|
||||||
long double reference_nextafterl(long double, long double);
|
long double reference_nextafterl(long double, long double);
|
||||||
|
float reference_nextafterh(float, float, bool allow_denormals = true);
|
||||||
|
cl_half reference_nanh(cl_ushort);
|
||||||
long double reference_recipl(long double);
|
long double reference_recipl(long double);
|
||||||
long double reference_rootnl(long double, int);
|
long double reference_rootnl(long double, int);
|
||||||
long double reference_rsqrtl(long double);
|
long double reference_rsqrtl(long double);
|
||||||
|
|||||||
777
test_conformance/math_brute_force/ternary_half.cpp
Normal file
777
test_conformance/math_brute_force/ternary_half.cpp
Normal file
@@ -0,0 +1,777 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#define CORRECTLY_ROUNDED 0
|
||||||
|
#define FLUSHED 1
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A table of more difficult cases to get right
|
||||||
|
static const cl_half specialValuesHalf[] = {
|
||||||
|
0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
|
||||||
|
0xfc00, /*-INFINITY*/
|
||||||
|
0x8000, /*-0*/
|
||||||
|
0x7bff, /*HALF_MAX*/
|
||||||
|
0x0400, /*HALF_MIN*/
|
||||||
|
0x03ff, /* Largest denormal */
|
||||||
|
0x3c00, /* 1 */
|
||||||
|
0xbc00, /* -1 */
|
||||||
|
0x3555, /*nearest value to 1/3*/
|
||||||
|
0x3bff, /*largest number less than one*/
|
||||||
|
0xc000, /* -2 */
|
||||||
|
0xfbff, /* -HALF_MAX */
|
||||||
|
0x8400, /* -HALF_MIN */
|
||||||
|
0x4248, /* M_PI_H */
|
||||||
|
0xc248, /* -M_PI_H */
|
||||||
|
0xbbff, /* Largest negative fraction */
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
|
||||||
|
Programs programs;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
KernelMatrix kernels;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
float maxErrorVal = 0.0f;
|
||||||
|
float maxErrorVal2 = 0.0f;
|
||||||
|
float maxErrorVal3 = 0.0f;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
|
||||||
|
constexpr size_t bufferElements = BUFFER_SIZE / sizeof(cl_half);
|
||||||
|
|
||||||
|
cl_uchar overflow[bufferElements];
|
||||||
|
float half_ulps = f->half_ulps;
|
||||||
|
int skipNanInf = (0 == strcmp("fma", f->nameInCode));
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_half *hp0 = (cl_half *)gIn;
|
||||||
|
cl_half *hp1 = (cl_half *)gIn2;
|
||||||
|
cl_half *hp2 = (cl_half *)gIn3;
|
||||||
|
size_t idx = 0;
|
||||||
|
|
||||||
|
if (i == 0)
|
||||||
|
{ // test edge cases
|
||||||
|
uint32_t x, y, z;
|
||||||
|
x = y = z = 0;
|
||||||
|
for (; idx < bufferElements; idx++)
|
||||||
|
{
|
||||||
|
hp0[idx] = specialValuesHalf[x];
|
||||||
|
hp1[idx] = specialValuesHalf[y];
|
||||||
|
hp2[idx] = specialValuesHalf[z];
|
||||||
|
|
||||||
|
if (++x >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
x = 0;
|
||||||
|
if (++y >= specialValuesHalfCount)
|
||||||
|
{
|
||||||
|
y = 0;
|
||||||
|
if (++z >= specialValuesHalfCount) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (idx == bufferElements)
|
||||||
|
vlog_error("Test Error: not all special cases tested!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto any_value = [&d]() {
|
||||||
|
float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
|
||||||
|
return HFF((1.0f - t) * CL_HALF_MIN + t * CL_HALF_MAX);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (; idx < bufferElements; idx++)
|
||||||
|
{
|
||||||
|
hp0[idx] = any_value();
|
||||||
|
hp1[idx] = any_value();
|
||||||
|
hp2[idx] = any_value();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||||
|
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||||
|
BUFFER_SIZE, gIn3, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, BUFFER_SIZE,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, BUFFER_SIZE, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
size_t vectorSize = sizeof(cl_half) * sizeValues[j];
|
||||||
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||||
|
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||||
|
sizeof(gInBuffer2), &gInBuffer2)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 3,
|
||||||
|
sizeof(gInBuffer3), &gInBuffer3)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue)))
|
||||||
|
{
|
||||||
|
vlog("clFlush failed\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_half *res = (cl_half *)gOut_Ref;
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
feclearexcept(FE_OVERFLOW);
|
||||||
|
res[j] = HFD((double)f->dfunc.f_fff(HTF(hp0[j]), HTF(hp1[j]),
|
||||||
|
HTF(hp2[j])));
|
||||||
|
overflow[j] =
|
||||||
|
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
res[j] = HFD((double)f->dfunc.f_fff(HTF(hp0[j]), HTF(hp1[j]),
|
||||||
|
HTF(hp2[j])));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
uint16_t *t = (uint16_t *)gOut_Ref;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
uint16_t *q = (uint16_t *)(gOut[k]);
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (t[j] != q[j])
|
||||||
|
{
|
||||||
|
int fail;
|
||||||
|
cl_half test = ((cl_half *)q)[j];
|
||||||
|
double ref1 = (double)f->dfunc.f_fff(
|
||||||
|
HTF(hp0[j]), HTF(hp1[j]), HTF(hp2[j]));
|
||||||
|
cl_half correct = HFD(ref1);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an input
|
||||||
|
// or output is a infinity or NaN or overflow
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (overflow[j] || IsHalfInfinity(correct)
|
||||||
|
|| IsHalfNaN(correct) || IsHalfInfinity(hp0[j])
|
||||||
|
|| IsHalfNaN(hp0[j]) || IsHalfInfinity(hp1[j])
|
||||||
|
|| IsHalfNaN(hp1[j]) || IsHalfInfinity(hp2[j])
|
||||||
|
|| IsHalfNaN(hp2[j]))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float err =
|
||||||
|
test != correct ? Ulp_Error_Half(test, ref1) : 0.f;
|
||||||
|
fail = !(fabsf(err) <= half_ulps);
|
||||||
|
|
||||||
|
if (fail && ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2 with flushing on
|
||||||
|
if (0.0f == test
|
||||||
|
&& 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
|
||||||
|
HTF(hp2[j]), FLUSHED))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (fail && IsHalfSubnormal(hp0[j]))
|
||||||
|
{ // look at me,
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
float ref2 =
|
||||||
|
f->func.f_fma(0.0f, HTF(hp1[j]), HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct2 = HFF(ref2);
|
||||||
|
float ref3 =
|
||||||
|
f->func.f_fma(-0.0f, HTF(hp1[j]), HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct3 = HFF(ref3);
|
||||||
|
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, ref2)
|
||||||
|
: 0.f;
|
||||||
|
float err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, ref3)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||||
|
HTF(hp2[j]), FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||||
|
HTF(hp2[j]), FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// try with first two args as zero
|
||||||
|
if (IsHalfSubnormal(hp1[j]))
|
||||||
|
{ // its fun to have fun,
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
ref2 = f->func.f_fma(0.0f, 0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct2 = HFF(ref2);
|
||||||
|
ref3 = f->func.f_fma(-0.0f, 0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct3 = HFF(ref3);
|
||||||
|
float ref4 =
|
||||||
|
f->func.f_fma(0.0f, -0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct4 = HFF(ref4);
|
||||||
|
float ref5 =
|
||||||
|
f->func.f_fma(-0.0f, -0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct5 = HFF(ref5);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result
|
||||||
|
// if an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3)
|
||||||
|
|| IsHalfInfinity(correct4)
|
||||||
|
|| IsHalfNaN(correct4)
|
||||||
|
|| IsHalfInfinity(correct5)
|
||||||
|
|| IsHalfNaN(correct5))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, ref2)
|
||||||
|
: 0.f;
|
||||||
|
err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, ref3)
|
||||||
|
: 0.f;
|
||||||
|
float err4 = test != correct4
|
||||||
|
? Ulp_Error_Half(test, ref4)
|
||||||
|
: 0.f;
|
||||||
|
float err5 = test != correct5
|
||||||
|
? Ulp_Error_Half(test, ref5)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps))
|
||||||
|
&& (!(fabsf(err4) <= half_ulps))
|
||||||
|
&& (!(fabsf(err5) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(0.0f, 0.0f,
|
||||||
|
HTF(hp2[j]),
|
||||||
|
FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(-0.0f, 0.0f,
|
||||||
|
HTF(hp2[j]),
|
||||||
|
FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(0.0f, -0.0f,
|
||||||
|
HTF(hp2[j]),
|
||||||
|
FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(-0.0f, -0.0f,
|
||||||
|
HTF(hp2[j]),
|
||||||
|
FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsHalfSubnormal(hp2[j]))
|
||||||
|
{
|
||||||
|
if (test == 0.0f) // 0*0+0 is 0
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfSubnormal(hp2[j]))
|
||||||
|
{
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
ref2 = f->func.f_fma(0.0f, HTF(hp1[j]), 0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct2 = HFF(ref2);
|
||||||
|
ref3 = f->func.f_fma(-0.0f, HTF(hp1[j]), 0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct3 = HFF(ref3);
|
||||||
|
float ref4 =
|
||||||
|
f->func.f_fma(0.0f, HTF(hp1[j]), -0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct4 = HFF(ref4);
|
||||||
|
float ref5 =
|
||||||
|
f->func.f_fma(-0.0f, HTF(hp1[j]), -0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct5 = HFF(ref5);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result
|
||||||
|
// if an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3)
|
||||||
|
|| IsHalfInfinity(correct4)
|
||||||
|
|| IsHalfNaN(correct4)
|
||||||
|
|| IsHalfInfinity(correct5)
|
||||||
|
|| IsHalfNaN(correct5))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, ref2)
|
||||||
|
: 0.f;
|
||||||
|
err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, ref3)
|
||||||
|
: 0.f;
|
||||||
|
float err4 = test != correct4
|
||||||
|
? Ulp_Error_Half(test, ref4)
|
||||||
|
: 0.f;
|
||||||
|
float err5 = test != correct5
|
||||||
|
? Ulp_Error_Half(test, ref5)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps))
|
||||||
|
&& (!(fabsf(err4) <= half_ulps))
|
||||||
|
&& (!(fabsf(err5) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||||
|
0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||||
|
0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(0.0f, HTF(hp1[j]),
|
||||||
|
-0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(-0.0f, HTF(hp1[j]),
|
||||||
|
-0.0f, FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fail && IsHalfSubnormal(hp1[j]))
|
||||||
|
{
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
float ref2 =
|
||||||
|
f->func.f_fma(HTF(hp0[j]), 0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct2 = HFF(ref2);
|
||||||
|
float ref3 =
|
||||||
|
f->func.f_fma(HTF(hp0[j]), -0.0f, HTF(hp2[j]),
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct3 = HFF(ref3);
|
||||||
|
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, ref2)
|
||||||
|
: 0.f;
|
||||||
|
float err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, ref3)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||||
|
HTF(hp2[j]), FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||||
|
HTF(hp2[j]), FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// try with second two args as zero
|
||||||
|
if (IsHalfSubnormal(hp2[j]))
|
||||||
|
{
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
ref2 = f->func.f_fma(HTF(hp0[j]), 0.0f, 0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct2 = HFF(ref2);
|
||||||
|
ref3 = f->func.f_fma(HTF(hp0[j]), -0.0f, 0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
correct3 = HFF(ref3);
|
||||||
|
float ref4 =
|
||||||
|
f->func.f_fma(HTF(hp0[j]), 0.0f, -0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct4 = HFF(ref4);
|
||||||
|
float ref5 =
|
||||||
|
f->func.f_fma(HTF(hp0[j]), -0.0f, -0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct5 = HFF(ref5);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result
|
||||||
|
// if an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (!gInfNanSupport)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3)
|
||||||
|
|| IsHalfInfinity(correct4)
|
||||||
|
|| IsHalfNaN(correct4)
|
||||||
|
|| IsHalfInfinity(correct5)
|
||||||
|
|| IsHalfNaN(correct5))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, ref2)
|
||||||
|
: 0.f;
|
||||||
|
err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, ref3)
|
||||||
|
: 0.f;
|
||||||
|
float err4 = test != correct4
|
||||||
|
? Ulp_Error_Half(test, ref4)
|
||||||
|
: 0.f;
|
||||||
|
float err5 = test != correct5
|
||||||
|
? Ulp_Error_Half(test, ref5)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps))
|
||||||
|
&& (!(fabsf(err4) <= half_ulps))
|
||||||
|
&& (!(fabsf(err5) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
if (fabsf(err4) < fabsf(err)) err = err4;
|
||||||
|
if (fabsf(err5) < fabsf(err)) err = err5;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||||
|
0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||||
|
0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), 0.0f,
|
||||||
|
-0.0f, FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]), -0.0f,
|
||||||
|
-0.0f, FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (fail && IsHalfSubnormal(hp2[j]))
|
||||||
|
{
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
float ref2 = f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
|
||||||
|
0.0f, CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct2 = HFF(ref2);
|
||||||
|
float ref3 =
|
||||||
|
f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]), -0.0f,
|
||||||
|
CORRECTLY_ROUNDED);
|
||||||
|
cl_half correct3 = HFF(ref3);
|
||||||
|
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct2)
|
||||||
|
|| IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(correct3)
|
||||||
|
|| IsHalfNaN(correct3))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float err2 = test != correct2
|
||||||
|
? Ulp_Error_Half(test, correct2)
|
||||||
|
: 0.f;
|
||||||
|
float err3 = test != correct3
|
||||||
|
? Ulp_Error_Half(test, correct3)
|
||||||
|
: 0.f;
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= half_ulps))
|
||||||
|
&& (!(fabsf(err3) <= half_ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (0.0f == test
|
||||||
|
&& (0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]),
|
||||||
|
HTF(hp1[j]), 0.0f,
|
||||||
|
FLUSHED)
|
||||||
|
|| 0.0f
|
||||||
|
== f->func.f_fma(HTF(hp0[j]),
|
||||||
|
HTF(hp1[j]), -0.0f,
|
||||||
|
FLUSHED)))
|
||||||
|
{
|
||||||
|
fail = 0;
|
||||||
|
err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fabsf(err) > maxError)
|
||||||
|
{
|
||||||
|
maxError = fabsf(err);
|
||||||
|
maxErrorVal = HTF(hp0[j]);
|
||||||
|
maxErrorVal2 = HTF(hp1[j]);
|
||||||
|
maxErrorVal3 = HTF(hp2[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
|
||||||
|
"({0x%4.4x, 0x%4.4x, 0x%4.4x}): *%a vs. %a\n",
|
||||||
|
f->name, sizeNames[k], err, HTF(hp0[j]),
|
||||||
|
HTF(hp1[j]), HTF(hp2[j]), hp0[j], hp1[j], hp2[j],
|
||||||
|
HTF(res[j]), HTF(test));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
|
||||||
|
i, step, BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||||
|
maxErrorVal3);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2021 The Khronos Group Inc.
|
// Copyright (c) 2021-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -24,6 +24,9 @@ int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
|||||||
// double foo(double)
|
// double foo(double)
|
||||||
int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half foo(half)
|
||||||
|
int TestFunc_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// int foo(float)
|
// int foo(float)
|
||||||
int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
@@ -36,6 +39,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode);
|
|||||||
// double foo(ulong)
|
// double foo(ulong)
|
||||||
int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half (Ushort)
|
||||||
|
int TestFunc_Half_UShort(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// Returns {0, 1} for scalar and {0, -1} for vector.
|
// Returns {0, 1} for scalar and {0, -1} for vector.
|
||||||
// int foo(float)
|
// int foo(float)
|
||||||
int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
@@ -44,21 +50,34 @@ int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
|
|||||||
// int foo(double)
|
// int foo(double)
|
||||||
int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// int foo(half,half)
|
||||||
|
int TestMacro_Int_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// int foo(half)
|
||||||
|
int TestMacro_Int_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// int foo(half)
|
||||||
|
int TestFunc_Int_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// float foo(float, float)
|
// float foo(float, float)
|
||||||
int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// double foo(double, double)
|
// double foo(double, double)
|
||||||
int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// Half foo(half, half)
|
||||||
|
int TestFunc_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
// Special handling for nextafter.
|
// Special handling for nextafter.
|
||||||
// float foo(float, float)
|
// Half foo(Half, Half)
|
||||||
int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
|
int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata, bool relaxedMode);
|
||||||
bool relaxedMode);
|
|
||||||
|
// Half foo(Half, Half)
|
||||||
|
int TestFunc_Half_Half_Half_common(const Func *f, MTdata, int isNextafter,
|
||||||
|
bool relaxedMode);
|
||||||
|
|
||||||
|
// Half foo(Half, int)
|
||||||
|
int TestFunc_Half_Half_Int(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// Special handling for nextafter.
|
|
||||||
// double foo(double, double)
|
|
||||||
int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
|
|
||||||
bool relaxedMode);
|
|
||||||
|
|
||||||
// float op float
|
// float op float
|
||||||
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
|
int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
|
||||||
@@ -68,6 +87,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
|
|||||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
|
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
|
||||||
bool relaxedMode);
|
bool relaxedMode);
|
||||||
|
|
||||||
|
// half op half
|
||||||
|
int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// float foo(float, int)
|
// float foo(float, int)
|
||||||
int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
@@ -89,24 +111,36 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
|||||||
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
|
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
|
||||||
bool relaxedMode);
|
bool relaxedMode);
|
||||||
|
|
||||||
|
// half foo(half, half, half)
|
||||||
|
int TestFunc_Half_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// float foo(float, float*)
|
// float foo(float, float*)
|
||||||
int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// double foo(double, double*)
|
// double foo(double, double*)
|
||||||
int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half foo(half, half*)
|
||||||
|
int TestFunc_Half2_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// float foo(float, int*)
|
// float foo(float, int*)
|
||||||
int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// double foo(double, int*)
|
// double foo(double, int*)
|
||||||
int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half foo(half, int*)
|
||||||
|
int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode);
|
||||||
|
|
||||||
// float foo(float, float, int*)
|
// float foo(float, float, int*)
|
||||||
int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
// double foo(double, double, int*)
|
// double foo(double, double, int*)
|
||||||
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half foo(half, half, int*)
|
||||||
|
int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode);
|
||||||
|
|
||||||
// Special handling for mad.
|
// Special handling for mad.
|
||||||
// float mad(float, float, float)
|
// float mad(float, float, float)
|
||||||
int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
|
||||||
@@ -115,4 +149,7 @@ int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
|
|||||||
// double mad(double, double, double)
|
// double mad(double, double, double)
|
||||||
int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
|
int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
|
// half mad(half, half, half)
|
||||||
|
int TestFunc_mad_Half(const Func *f, MTdata, bool relaxedMode);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
483
test_conformance/math_brute_force/unary_half.cpp
Normal file
483
test_conformance/math_brute_force/unary_half.cpp
Normal file
@@ -0,0 +1,483 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thread specific data for a worker thread
|
||||||
|
typedef struct ThreadInfo
|
||||||
|
{
|
||||||
|
clMemWrapper inBuf; // input buffer for the thread
|
||||||
|
clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
|
||||||
|
float maxError; // max error value. Init to 0.
|
||||||
|
double maxErrorValue; // position of the max error value. Init to 0.
|
||||||
|
clCommandQueueWrapper
|
||||||
|
tQueue; // per thread command queue to improve performance
|
||||||
|
} ThreadInfo;
|
||||||
|
|
||||||
|
struct TestInfoBase
|
||||||
|
{
|
||||||
|
size_t subBufferSize; // Size of the sub-buffer in elements
|
||||||
|
const Func *f; // A pointer to the function info
|
||||||
|
cl_uint threadCount; // Number of worker threads
|
||||||
|
cl_uint jobCount; // Number of jobs
|
||||||
|
cl_uint step; // step between each chunk and the next.
|
||||||
|
cl_uint scale; // stride between individual test values
|
||||||
|
float ulps; // max_allowed ulps
|
||||||
|
int ftz; // non-zero if running in flush to zero mode
|
||||||
|
|
||||||
|
int isRangeLimited; // 1 if the function is only to be evaluated over a
|
||||||
|
// range
|
||||||
|
float half_sin_cos_tan_limit;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TestInfo : public TestInfoBase
|
||||||
|
{
|
||||||
|
TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
|
||||||
|
|
||||||
|
// Array of thread specific information
|
||||||
|
std::vector<ThreadInfo> tinfo;
|
||||||
|
|
||||||
|
// Programs for various vector sizes.
|
||||||
|
Programs programs;
|
||||||
|
|
||||||
|
// Thread-specific kernels for each vector size:
|
||||||
|
// k[vector_size][thread_id]
|
||||||
|
KernelMatrix k;
|
||||||
|
};
|
||||||
|
|
||||||
|
cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
|
||||||
|
{
|
||||||
|
TestInfo *job = (TestInfo *)data;
|
||||||
|
size_t buffer_elements = job->subBufferSize;
|
||||||
|
size_t buffer_size = buffer_elements * sizeof(cl_half);
|
||||||
|
cl_uint scale = job->scale;
|
||||||
|
cl_uint base = job_id * (cl_uint)job->step;
|
||||||
|
ThreadInfo *tinfo = &(job->tinfo[thread_id]);
|
||||||
|
float ulps = job->ulps;
|
||||||
|
fptr func = job->f->func;
|
||||||
|
cl_uint j, k;
|
||||||
|
cl_int error = CL_SUCCESS;
|
||||||
|
|
||||||
|
int isRangeLimited = job->isRangeLimited;
|
||||||
|
float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
|
||||||
|
int ftz = job->ftz;
|
||||||
|
|
||||||
|
std::vector<float> s(0);
|
||||||
|
|
||||||
|
cl_event e[VECTOR_SIZE_COUNT];
|
||||||
|
cl_ushort *out[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// start the map of the output arrays
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (uint16_t *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
|
||||||
|
buffer_size, 0, NULL, e + j, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the new values to the input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
p[j] = base + j * scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
|
||||||
|
buffer_size, p, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
// Wait for the map to finish
|
||||||
|
if ((error = clWaitForEvents(1, e + j)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clReleaseEvent(e[j])))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the result buffer with garbage, so that old results don't carry
|
||||||
|
// over
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(out[j], &pattern, buffer_size);
|
||||||
|
error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueUnmapMemObject failed!\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
&pattern, sizeof(pattern), 0,
|
||||||
|
buffer_size, 0, NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the kernel
|
||||||
|
size_t vectorCount =
|
||||||
|
(buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||||
|
cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
|
||||||
|
// own copy of the cl_kernel
|
||||||
|
cl_program program = job->programs[j];
|
||||||
|
|
||||||
|
if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
|
||||||
|
&tinfo->outBuf[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
|
||||||
|
&tinfo->inBuf)))
|
||||||
|
{
|
||||||
|
LogBuildError(program);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
|
||||||
|
&vectorCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) return CL_SUCCESS;
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_half *r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
|
||||||
|
s.resize(buffer_elements);
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
s[j] = (float)cl_half_to_float(p[j]);
|
||||||
|
r[j] = HFF(func.f_f(s[j]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the data back -- no need to wait for the first N-1 buffers. This is
|
||||||
|
// an in order queue.
|
||||||
|
for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
out[j] = (uint16_t *)clEnqueueMapBuffer(
|
||||||
|
tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
|
||||||
|
error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Wait for the last buffer
|
||||||
|
out[j] = (uint16_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
CL_TRUE, CL_MAP_READ, 0,
|
||||||
|
buffer_size, 0, NULL, NULL, &error);
|
||||||
|
if (error || NULL == out[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (j = 0; j < buffer_elements; j++)
|
||||||
|
{
|
||||||
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_ushort *q = out[k];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (r[j] != q[j])
|
||||||
|
{
|
||||||
|
float test = cl_half_to_float(q[j]);
|
||||||
|
double correct = func.f_f(s[j]);
|
||||||
|
float err = Ulp_Error_Half(q[j], correct);
|
||||||
|
int fail = !(fabsf(err) <= ulps);
|
||||||
|
|
||||||
|
// half_sin/cos/tan are only valid between +-2**16, Inf, NaN
|
||||||
|
if (isRangeLimited
|
||||||
|
&& fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
|
||||||
|
&& fabsf(s[j]) < INFINITY)
|
||||||
|
{
|
||||||
|
if (fabsf(test) <= half_sin_cos_tan_limit)
|
||||||
|
{
|
||||||
|
err = 0;
|
||||||
|
fail = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(p[j]))
|
||||||
|
{
|
||||||
|
double correct2 = func.f_f(0.0);
|
||||||
|
double correct3 = func.f_f(-0.0);
|
||||||
|
float err2 = Ulp_Error_Half(q[j], correct2);
|
||||||
|
float err3 = Ulp_Error_Half(q[j], correct3);
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(err2) <= ulps))
|
||||||
|
&& (!(fabsf(err3) <= ulps)));
|
||||||
|
if (fabsf(err2) < fabsf(err)) err = err2;
|
||||||
|
if (fabsf(err3) < fabsf(err)) err = err3;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(correct2, ulps)
|
||||||
|
|| IsHalfResultSubnormal(correct3, ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fabsf(err) > tinfo->maxError)
|
||||||
|
{
|
||||||
|
tinfo->maxError = fabsf(err);
|
||||||
|
tinfo->maxErrorValue = s[j];
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: %f ulp error at %a "
|
||||||
|
"(half 0x%04x)\nExpected: %a (half 0x%04x) "
|
||||||
|
"\nActual: %a (half 0x%04x)\n",
|
||||||
|
job->f->name, sizeNames[k], err, s[j], p[j],
|
||||||
|
cl_half_to_float(r[j]), r[j], test, q[j]);
|
||||||
|
error = -1;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
|
||||||
|
out[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
|
||||||
|
j, error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
|
||||||
|
|
||||||
|
|
||||||
|
if (0 == (base & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
|
||||||
|
"ThreadCount:%2u\n",
|
||||||
|
base, job->step, job->scale, buffer_elements, job->ulps,
|
||||||
|
job->threadCount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
TestInfoBase test_info_base;
|
||||||
|
cl_int error;
|
||||||
|
size_t i, j;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
double maxErrorVal = 0.0;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
// Init test_info
|
||||||
|
memset(&test_info_base, 0, sizeof(test_info_base));
|
||||||
|
TestInfo test_info(test_info_base);
|
||||||
|
|
||||||
|
test_info.threadCount = GetThreadCount();
|
||||||
|
|
||||||
|
test_info.subBufferSize = BUFFER_SIZE
|
||||||
|
/ (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
|
||||||
|
test_info.scale = getTestScale(sizeof(cl_half));
|
||||||
|
test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
|
||||||
|
if (test_info.step / test_info.subBufferSize != test_info.scale)
|
||||||
|
{
|
||||||
|
// there was overflow
|
||||||
|
test_info.jobCount = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
test_info.jobCount =
|
||||||
|
std::max((cl_uint)1,
|
||||||
|
(cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
|
||||||
|
}
|
||||||
|
|
||||||
|
test_info.f = f;
|
||||||
|
test_info.ulps = f->half_ulps;
|
||||||
|
test_info.ftz =
|
||||||
|
f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
|
||||||
|
test_info.tinfo.resize(test_info.threadCount);
|
||||||
|
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
cl_buffer_region region = { i * test_info.subBufferSize
|
||||||
|
* sizeof(cl_half),
|
||||||
|
test_info.subBufferSize * sizeof(cl_half) };
|
||||||
|
test_info.tinfo[i].inBuf =
|
||||||
|
clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
|
||||||
|
CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].inBuf)
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
|
||||||
|
"region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
|
||||||
|
gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
|
||||||
|
®ion, &error);
|
||||||
|
if (error || NULL == test_info.tinfo[i].outBuf[j])
|
||||||
|
{
|
||||||
|
vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
|
||||||
|
"for region {%zd, %zd}\n",
|
||||||
|
region.origin, region.size);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_info.tinfo[i].tQueue =
|
||||||
|
clCreateCommandQueue(gContext, gDevice, 0, &error);
|
||||||
|
if (NULL == test_info.tinfo[i].tQueue || error)
|
||||||
|
{
|
||||||
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for special cases for unary float
|
||||||
|
test_info.isRangeLimited = 0;
|
||||||
|
test_info.half_sin_cos_tan_limit = 0;
|
||||||
|
if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
|
||||||
|
{
|
||||||
|
test_info.isRangeLimited = 1;
|
||||||
|
test_info.half_sin_cos_tan_limit = 1.0f
|
||||||
|
+ test_info.ulps
|
||||||
|
* (FLT_EPSILON / 2.0f); // out of range results from finite
|
||||||
|
// inputs must be in [-1,1]
|
||||||
|
}
|
||||||
|
else if (0 == strcmp(f->name, "half_tan"))
|
||||||
|
{
|
||||||
|
test_info.isRangeLimited = 1;
|
||||||
|
test_info.half_sin_cos_tan_limit =
|
||||||
|
INFINITY; // out of range resut from finite inputs must be numeric
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
{
|
||||||
|
BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
|
||||||
|
test_info.programs, f->nameInCode };
|
||||||
|
error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info);
|
||||||
|
test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
|
||||||
|
|
||||||
|
// Accumulate the arithmetic errors
|
||||||
|
for (i = 0; i < test_info.threadCount; i++)
|
||||||
|
{
|
||||||
|
if (test_info.tinfo[i].maxError > maxError)
|
||||||
|
{
|
||||||
|
maxError = test_info.tinfo[i].maxError;
|
||||||
|
maxErrorVal = test_info.tinfo[i].maxErrorValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_error(error, "ThreadPool_Do: TestHalf failed\n");
|
||||||
|
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
452
test_conformance/math_brute_force/unary_two_results_half.cpp
Normal file
452
test_conformance/math_brute_force/unary_two_results_half.cpp
Normal file
@@ -0,0 +1,452 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Half, ParameterType::Half,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
Programs programs;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
KernelMatrix kernels;
|
||||||
|
float maxError0 = 0.0f;
|
||||||
|
float maxError1 = 0.0f;
|
||||||
|
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
float maxErrorVal0 = 0.0f;
|
||||||
|
float maxErrorVal1 = 0.0f;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
|
||||||
|
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
|
||||||
|
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||||
|
size_t bufferSize = bufferElements * sizeof(cl_half);
|
||||||
|
|
||||||
|
std::vector<cl_uchar> overflow(bufferElements);
|
||||||
|
int isFract = 0 == strcmp("fract", f->nameInCode);
|
||||||
|
int skipNanInf = isFract;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
float half_ulps = f->half_ulps;
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_half *pIn = (cl_half *)gIn;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, bufferSize,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset_pattern4(gOut2[j], &pattern, bufferSize);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||||
|
CL_FALSE, 0, bufferSize,
|
||||||
|
gOut2[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSize, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||||
|
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSize, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
size_t vectorSize = sizeValues[j] * sizeof(cl_half);
|
||||||
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error =
|
||||||
|
clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue)))
|
||||||
|
{
|
||||||
|
vlog_error("clFlush failed\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
FPU_mode_type oldMode;
|
||||||
|
RoundingMode oldRoundMode = kRoundToNearestEven;
|
||||||
|
if (isFract)
|
||||||
|
{
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
memset(&oldMode, 0, sizeof(oldMode));
|
||||||
|
if (ftz) ForceFTZ(&oldMode);
|
||||||
|
|
||||||
|
// Set the rounding mode to match the device
|
||||||
|
if (gIsInRTZMode)
|
||||||
|
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_half *ref1 = (cl_half *)gOut_Ref;
|
||||||
|
cl_half *ref2 = (cl_half *)gOut_Ref2;
|
||||||
|
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
double dd;
|
||||||
|
feclearexcept(FE_OVERFLOW);
|
||||||
|
|
||||||
|
ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
|
||||||
|
ref2[j] = HFF((float)dd);
|
||||||
|
|
||||||
|
// ensure correct rounding of fract result is not reaching 1
|
||||||
|
if (isFract && HTF(ref1[j]) >= 1.f) ref1[j] = 0x3bff;
|
||||||
|
|
||||||
|
overflow[j] =
|
||||||
|
FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
double dd;
|
||||||
|
ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
|
||||||
|
ref2[j] = HFF((float)dd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isFract && ftz) RestoreFPState(&oldMode);
|
||||||
|
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut2[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray2 failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_half *test1 = (cl_half *)gOut[k];
|
||||||
|
cl_half *test2 = (cl_half *)gOut2[k];
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (ref1[j] != test1[j] || ref2[j] != test2[j])
|
||||||
|
{
|
||||||
|
double fp_correct1 = 0, fp_correct2 = 0;
|
||||||
|
float err = 0, err2 = 0;
|
||||||
|
|
||||||
|
fp_correct1 = f->func.f_fpf(HTF(pIn[j]), &fp_correct2);
|
||||||
|
|
||||||
|
cl_half correct1 = HFF(fp_correct1);
|
||||||
|
cl_half correct2 = HFF(fp_correct2);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if an input
|
||||||
|
// or output is a infinity or NaN or overflow
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (skipNanInf && overflow[j]) continue;
|
||||||
|
// Note: no double rounding here. Reference functions
|
||||||
|
// calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correct1) || IsHalfNaN(correct1)
|
||||||
|
|| IsHalfInfinity(correct2) || IsHalfNaN(correct2)
|
||||||
|
|| IsHalfInfinity(pIn[j]) || IsHalfNaN(pIn[j]))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = Ulp_Error_Half(test1[j], fp_correct1);
|
||||||
|
err2 = Ulp_Error_Half(test2[j], fp_correct2);
|
||||||
|
|
||||||
|
int fail =
|
||||||
|
!(fabsf(err) <= half_ulps && fabsf(err2) <= half_ulps);
|
||||||
|
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(fp_correct1, half_ulps))
|
||||||
|
{
|
||||||
|
if (IsHalfResultSubnormal(fp_correct2, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test1[j]) == 0.0f
|
||||||
|
&& HTF(test2[j]) == 0.0f);
|
||||||
|
if (!fail)
|
||||||
|
{
|
||||||
|
err = 0.0f;
|
||||||
|
err2 = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test1[j]) == 0.0f
|
||||||
|
&& fabsf(err2) <= half_ulps);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfResultSubnormal(fp_correct2, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test2[j]) == 0.0f
|
||||||
|
&& fabsf(err) <= half_ulps);
|
||||||
|
if (!fail) err2 = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(pIn[j]))
|
||||||
|
{
|
||||||
|
double fp_correctp, fp_correctn;
|
||||||
|
double fp_correct2p, fp_correct2n;
|
||||||
|
float errp, err2p, errn, err2n;
|
||||||
|
|
||||||
|
if (skipNanInf) feclearexcept(FE_OVERFLOW);
|
||||||
|
fp_correctp = f->func.f_fpf(0.0, &fp_correct2p);
|
||||||
|
fp_correctn = f->func.f_fpf(-0.0, &fp_correct2n);
|
||||||
|
|
||||||
|
cl_half correctp = HFF(fp_correctp);
|
||||||
|
cl_half correctn = HFF(fp_correctn);
|
||||||
|
cl_half correct2p = HFF(fp_correct2p);
|
||||||
|
cl_half correct2n = HFF(fp_correct2n);
|
||||||
|
|
||||||
|
// Per section 10 paragraph 6, accept any result if
|
||||||
|
// an input or output is a infinity or NaN or
|
||||||
|
// overflow
|
||||||
|
if (skipNanInf)
|
||||||
|
{
|
||||||
|
if (fetestexcept(FE_OVERFLOW)) continue;
|
||||||
|
|
||||||
|
// Note: no double rounding here. Reference
|
||||||
|
// functions calculate in single precision.
|
||||||
|
if (IsHalfInfinity(correctp)
|
||||||
|
|| IsHalfNaN(correctp)
|
||||||
|
|| IsHalfInfinity(correctn)
|
||||||
|
|| IsHalfNaN(correctn)
|
||||||
|
|| IsHalfInfinity(correct2p)
|
||||||
|
|| IsHalfNaN(correct2p)
|
||||||
|
|| IsHalfInfinity(correct2n)
|
||||||
|
|| IsHalfNaN(correct2n))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
errp = Ulp_Error_Half(test1[j], fp_correctp);
|
||||||
|
err2p = Ulp_Error_Half(test1[j], fp_correct2p);
|
||||||
|
errn = Ulp_Error_Half(test1[j], fp_correctn);
|
||||||
|
err2n = Ulp_Error_Half(test1[j], fp_correct2n);
|
||||||
|
|
||||||
|
fail = fail
|
||||||
|
&& ((!(fabsf(errp) <= half_ulps))
|
||||||
|
&& (!(fabsf(err2p) <= half_ulps))
|
||||||
|
&& ((!(fabsf(errn) <= half_ulps))
|
||||||
|
&& (!(fabsf(err2n) <= half_ulps))));
|
||||||
|
if (fabsf(errp) < fabsf(err)) err = errp;
|
||||||
|
if (fabsf(errn) < fabsf(err)) err = errn;
|
||||||
|
if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
|
||||||
|
if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (IsHalfResultSubnormal(fp_correctp, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(fp_correctn,
|
||||||
|
half_ulps))
|
||||||
|
{
|
||||||
|
if (IsHalfResultSubnormal(fp_correct2p,
|
||||||
|
half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(fp_correct2n,
|
||||||
|
half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test1[j]) == 0.0f
|
||||||
|
&& HTF(test2[j]) == 0.0f);
|
||||||
|
if (!fail) err = err2 = 0.0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test1[j]) == 0.0f
|
||||||
|
&& fabsf(err2) <= half_ulps);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsHalfResultSubnormal(fp_correct2p,
|
||||||
|
half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(fp_correct2n,
|
||||||
|
half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(HTF(test2[j]) == 0.0f
|
||||||
|
&& (fabsf(err) <= half_ulps));
|
||||||
|
if (!fail) err2 = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fabsf(err) > maxError0)
|
||||||
|
{
|
||||||
|
maxError0 = fabsf(err);
|
||||||
|
maxErrorVal0 = HTF(pIn[j]);
|
||||||
|
}
|
||||||
|
if (fabsf(err2) > maxError1)
|
||||||
|
{
|
||||||
|
maxError1 = fabsf(err2);
|
||||||
|
maxErrorVal1 = HTF(pIn[j]);
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
|
||||||
|
"*{%a, %a} vs. {%a, %a}\n",
|
||||||
|
f->name, sizeNames[k], err, err2,
|
||||||
|
HTF(pIn[j]), HTF(ref1[j]), HTF(ref2[j]),
|
||||||
|
HTF(test1[j]), HTF(test2[j]));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||||
|
" bufferSize:%10zu \n",
|
||||||
|
i, step, bufferSize);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
|
||||||
|
maxErrorVal1);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
347
test_conformance/math_brute_force/unary_two_results_i_half.cpp
Normal file
347
test_conformance/math_brute_force/unary_two_results_i_half.cpp
Normal file
@@ -0,0 +1,347 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::Int, ParameterType::Half,
|
||||||
|
vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_ulong abs_cl_long(cl_long i)
|
||||||
|
{
|
||||||
|
cl_long mask = i >> 63;
|
||||||
|
return (i ^ mask) - mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
Programs programs;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
KernelMatrix kernels;
|
||||||
|
float maxError = 0.0f;
|
||||||
|
int64_t maxError2 = 0;
|
||||||
|
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
float maxErrorVal = 0.0f;
|
||||||
|
float maxErrorVal2 = 0.0f;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
|
||||||
|
// sizeof(cl_half) < sizeof (int32_t)
|
||||||
|
// to prevent overflowing gOut_Ref2 it is necessary to use
|
||||||
|
// bigger type as denominator for buffer size calculation
|
||||||
|
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
|
||||||
|
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||||
|
|
||||||
|
size_t bufferSizeLo = bufferElements * sizeof(cl_half);
|
||||||
|
size_t bufferSizeHi = bufferElements * sizeof(cl_int);
|
||||||
|
|
||||||
|
cl_ulong maxiError = 0;
|
||||||
|
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
|
||||||
|
float half_ulps = f->half_ulps;
|
||||||
|
|
||||||
|
maxiError = half_ulps == INFINITY ? CL_ULONG_MAX : 0;
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
return error;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 16); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_half *pIn = (cl_half *)gIn;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSizeLo, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, bufferSizeLo);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, bufferSizeLo,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset_pattern4(gOut2[j], &pattern, bufferSizeHi);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
|
||||||
|
CL_FALSE, 0, bufferSizeHi,
|
||||||
|
gOut2[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSizeLo, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 1 failed!\n");
|
||||||
|
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSizeHi, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer 2 failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
// align working group size with the bigger output type
|
||||||
|
size_t vectorSize = sizeValues[j] * sizeof(cl_int);
|
||||||
|
size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error =
|
||||||
|
clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 2,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue)))
|
||||||
|
{
|
||||||
|
vlog_error("clFlush failed\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_half *ref1 = (cl_half *)gOut_Ref;
|
||||||
|
int32_t *ref2 = (int32_t *)gOut_Ref2;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
ref1[j] = HFF((float)f->func.f_fpI(HTF(pIn[j]), ref2 + j));
|
||||||
|
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
cl_bool blocking =
|
||||||
|
(j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
|
||||||
|
bufferSizeLo, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking,
|
||||||
|
0, bufferSizeHi, gOut2[j], 0, NULL,
|
||||||
|
NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray2 failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_half *test1 = (cl_half *)(gOut[k]);
|
||||||
|
int32_t *test2 = (int32_t *)(gOut2[k]);
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (ref1[j] != test1[j] || ref2[j] != test2[j])
|
||||||
|
{
|
||||||
|
cl_half test = ((cl_half *)test1)[j];
|
||||||
|
int correct2 = INT_MIN;
|
||||||
|
float fp_correct =
|
||||||
|
(float)f->func.f_fpI(HTF(pIn[j]), &correct2);
|
||||||
|
cl_half correct = HFF(fp_correct);
|
||||||
|
float err = correct != test
|
||||||
|
? Ulp_Error_Half(test, fp_correct)
|
||||||
|
: 0.f;
|
||||||
|
cl_long iErr = (int64_t)test2[j] - (int64_t)correct2;
|
||||||
|
int fail = !(fabsf(err) <= half_ulps
|
||||||
|
&& abs_cl_long(iErr) <= maxiError);
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(fp_correct, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail && !(test == 0.0f && iErr == 0);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.3
|
||||||
|
if (IsHalfSubnormal(pIn[j]))
|
||||||
|
{
|
||||||
|
int correct5, correct6;
|
||||||
|
double fp_correct3 = f->func.f_fpI(0.0, &correct5);
|
||||||
|
double fp_correct4 = f->func.f_fpI(-0.0, &correct6);
|
||||||
|
|
||||||
|
float err2 = Ulp_Error_Half(test, fp_correct3);
|
||||||
|
float err3 = Ulp_Error_Half(test, fp_correct4);
|
||||||
|
|
||||||
|
cl_long iErr2 =
|
||||||
|
(long long)test2[j] - (long long)correct5;
|
||||||
|
cl_long iErr3 =
|
||||||
|
(long long)test2[j] - (long long)correct6;
|
||||||
|
|
||||||
|
// Did +0 work?
|
||||||
|
if (fabsf(err2) <= half_ulps
|
||||||
|
&& abs_cl_long(iErr2) <= maxiError)
|
||||||
|
{
|
||||||
|
err = err2;
|
||||||
|
iErr = iErr2;
|
||||||
|
fail = 0;
|
||||||
|
}
|
||||||
|
// Did -0 work?
|
||||||
|
else if (fabsf(err3) <= half_ulps
|
||||||
|
&& abs_cl_long(iErr3) <= maxiError)
|
||||||
|
{
|
||||||
|
err = err3;
|
||||||
|
iErr = iErr3;
|
||||||
|
fail = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry per section 6.5.3.4
|
||||||
|
if (fail
|
||||||
|
&& (IsHalfResultSubnormal(correct2, half_ulps)
|
||||||
|
|| IsHalfResultSubnormal(fp_correct3,
|
||||||
|
half_ulps)))
|
||||||
|
{
|
||||||
|
fail = fail
|
||||||
|
&& !(test == 0.0f
|
||||||
|
&& (abs_cl_long(iErr2) <= maxiError
|
||||||
|
|| abs_cl_long(iErr3)
|
||||||
|
<= maxiError));
|
||||||
|
if (!fail)
|
||||||
|
{
|
||||||
|
err = 0.0f;
|
||||||
|
iErr = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fabsf(err) > maxError)
|
||||||
|
{
|
||||||
|
maxError = fabsf(err);
|
||||||
|
maxErrorVal = pIn[j];
|
||||||
|
}
|
||||||
|
if (llabs(iErr) > maxError2)
|
||||||
|
{
|
||||||
|
maxError2 = llabs(iErr);
|
||||||
|
maxErrorVal2 = pIn[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
|
||||||
|
"*{%a, %d} vs. {%a, %d}\n",
|
||||||
|
f->name, sizeNames[k], err, (int)iErr,
|
||||||
|
HTF(pIn[j]), HTF(ref1[j]),
|
||||||
|
((int *)gOut_Ref2)[j], HTF(test), test2[j]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||||
|
" bufferSize:%10zu \n",
|
||||||
|
i, step, bufferSizeHi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
|
||||||
|
vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
|
||||||
|
maxErrorVal, maxErrorVal2);
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
239
test_conformance/math_brute_force/unary_u_half.cpp
Normal file
239
test_conformance/math_brute_force/unary_u_half.cpp
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "function_list.h"
|
||||||
|
#include "test_functions.h"
|
||||||
|
#include "utility.h"
|
||||||
|
#include "reference_math.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <cinttypes>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
|
void *p)
|
||||||
|
{
|
||||||
|
BuildKernelInfo &info = *(BuildKernelInfo *)p;
|
||||||
|
auto generator = [](const std::string &kernel_name, const char *builtin,
|
||||||
|
cl_uint vector_size_index) {
|
||||||
|
return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
|
||||||
|
ParameterType::UShort, vector_size_index);
|
||||||
|
};
|
||||||
|
return BuildKernels(info, job_id, generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
Programs programs;
|
||||||
|
KernelMatrix kernels;
|
||||||
|
const unsigned thread_id = 0; // Test is currently not multithreaded.
|
||||||
|
float maxError = 0.0f;
|
||||||
|
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
|
||||||
|
float maxErrorVal = 0.0f;
|
||||||
|
uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
|
||||||
|
size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
|
||||||
|
size_t(1ULL << (sizeof(cl_half) * 8)));
|
||||||
|
size_t bufferSize = bufferElements * sizeof(cl_half);
|
||||||
|
logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
|
||||||
|
const char *name = f->name;
|
||||||
|
float half_ulps = f->half_ulps;
|
||||||
|
|
||||||
|
// Init the kernels
|
||||||
|
BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
|
||||||
|
if ((error = ThreadPool_Do(BuildKernel_HalfFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
||||||
|
{
|
||||||
|
// Init input array
|
||||||
|
cl_ushort *p = (cl_ushort *)gIn;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j;
|
||||||
|
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write garbage into output arrays
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
uint32_t pattern = 0xacdcacdc;
|
||||||
|
if (gHostFill)
|
||||||
|
{
|
||||||
|
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||||
|
if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
|
||||||
|
CL_FALSE, 0, bufferSize,
|
||||||
|
gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
|
||||||
|
sizeof(pattern), 0, bufferSize, 0,
|
||||||
|
NULL, NULL);
|
||||||
|
test_error(error, "clEnqueueFillBuffer failed!\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the kernels
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
size_t vectorSize = sizeValues[j] * sizeof(cl_half);
|
||||||
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 0,
|
||||||
|
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j][thread_id], 1,
|
||||||
|
sizeof(gInBuffer), &gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
|
||||||
|
1, NULL, &localCount, NULL, 0,
|
||||||
|
NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get that moving
|
||||||
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
|
|
||||||
|
// Calculate the correctly rounded reference result
|
||||||
|
cl_half *r = (cl_half *)gOut_Ref;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
if (!strcmp(name, "nan"))
|
||||||
|
r[j] = reference_nanh(p[j]);
|
||||||
|
else
|
||||||
|
r[j] = HFF(f->func.f_u(p[j]));
|
||||||
|
}
|
||||||
|
// Read the data back
|
||||||
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
|
{
|
||||||
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gSkipCorrectnessTesting) break;
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
cl_ushort *t = (cl_ushort *)gOut_Ref;
|
||||||
|
for (size_t j = 0; j < bufferElements; j++)
|
||||||
|
{
|
||||||
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
|
{
|
||||||
|
cl_ushort *q = (cl_ushort *)(gOut[k]);
|
||||||
|
|
||||||
|
// If we aren't getting the correctly rounded result
|
||||||
|
if (t[j] != q[j])
|
||||||
|
{
|
||||||
|
double test = cl_half_to_float(q[j]);
|
||||||
|
double correct;
|
||||||
|
if (!strcmp(name, "nan"))
|
||||||
|
correct = cl_half_to_float(reference_nanh(p[j]));
|
||||||
|
else
|
||||||
|
correct = f->func.f_u(p[j]);
|
||||||
|
|
||||||
|
float err = Ulp_Error_Half(q[j], correct);
|
||||||
|
int fail = !(fabsf(err) <= half_ulps);
|
||||||
|
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
if (ftz)
|
||||||
|
{
|
||||||
|
// retry per section 6.5.3.2
|
||||||
|
if (IsHalfResultSubnormal(correct, half_ulps))
|
||||||
|
{
|
||||||
|
fail = fail && (test != 0.0f);
|
||||||
|
if (!fail) err = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fabsf(err) > maxError)
|
||||||
|
{
|
||||||
|
maxError = fabsf(err);
|
||||||
|
maxErrorVal = p[j];
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
vlog_error(
|
||||||
|
"\n%s%s: %f ulp error at 0x%04x \nExpected: %a "
|
||||||
|
"(0x%04x) \nActual: %a (0x%04x)\n",
|
||||||
|
f->name, sizeNames[k], err, p[j],
|
||||||
|
cl_half_to_float(r[j]), r[j], test, q[j]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == (i & 0x0fffffff))
|
||||||
|
{
|
||||||
|
if (gVerboseBruteForce)
|
||||||
|
{
|
||||||
|
vlog("base:%14" PRIu64 " step:%10" PRIu64
|
||||||
|
" bufferSize:%10zd \n",
|
||||||
|
i, step, bufferSize);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting)
|
||||||
|
{
|
||||||
|
if (gWimpyMode)
|
||||||
|
vlog("Wimp pass");
|
||||||
|
else
|
||||||
|
vlog("passed");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||||
|
vlog("\n");
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "harness/testHarness.h"
|
#include "harness/testHarness.h"
|
||||||
#include "harness/ThreadPool.h"
|
#include "harness/ThreadPool.h"
|
||||||
#include "harness/conversions.h"
|
#include "harness/conversions.h"
|
||||||
|
#include "CL/cl_half.h"
|
||||||
|
|
||||||
#define BUFFER_SIZE (1024 * 1024 * 2)
|
#define BUFFER_SIZE (1024 * 1024 * 2)
|
||||||
#define EMBEDDED_REDUCTION_FACTOR (64)
|
#define EMBEDDED_REDUCTION_FACTOR (64)
|
||||||
@@ -61,10 +62,21 @@ extern int gFastRelaxedDerived;
|
|||||||
extern int gWimpyMode;
|
extern int gWimpyMode;
|
||||||
extern int gHostFill;
|
extern int gHostFill;
|
||||||
extern int gIsInRTZMode;
|
extern int gIsInRTZMode;
|
||||||
|
extern int gHasHalf;
|
||||||
|
extern int gInfNanSupport;
|
||||||
|
extern int gIsEmbedded;
|
||||||
extern int gVerboseBruteForce;
|
extern int gVerboseBruteForce;
|
||||||
extern uint32_t gMaxVectorSizeIndex;
|
extern uint32_t gMaxVectorSizeIndex;
|
||||||
extern uint32_t gMinVectorSizeIndex;
|
extern uint32_t gMinVectorSizeIndex;
|
||||||
extern cl_device_fp_config gFloatCapabilities;
|
extern cl_device_fp_config gFloatCapabilities;
|
||||||
|
extern cl_device_fp_config gHalfCapabilities;
|
||||||
|
extern RoundingMode gFloatToHalfRoundingMode;
|
||||||
|
|
||||||
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
|
|
||||||
|
#define HFF(num) cl_half_from_float(num, gHalfRoundingMode)
|
||||||
|
#define HFD(num) cl_half_from_double(num, gHalfRoundingMode)
|
||||||
|
#define HTF(num) cl_half_to_float(num)
|
||||||
|
|
||||||
#define LOWER_IS_BETTER 0
|
#define LOWER_IS_BETTER 0
|
||||||
#define HIGHER_IS_BETTER 1
|
#define HIGHER_IS_BETTER 1
|
||||||
@@ -115,6 +127,12 @@ inline int IsFloatResultSubnormal(double x, float ulps)
|
|||||||
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
|
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int IsHalfResultSubnormal(float x, float ulps)
|
||||||
|
{
|
||||||
|
x = fabs(x) - MAKE_HEX_FLOAT(0x1.0p-24, 0x1, -24) * ulps;
|
||||||
|
return x < MAKE_HEX_FLOAT(0x1.0p-14, 0x1, -14);
|
||||||
|
}
|
||||||
|
|
||||||
inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
|
inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
|
||||||
{
|
{
|
||||||
x = x - abs_err;
|
x = x - abs_err;
|
||||||
@@ -157,6 +175,26 @@ inline int IsFloatNaN(double x)
|
|||||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool IsHalfNaN(const cl_half v)
|
||||||
|
{
|
||||||
|
// Extract FP16 exponent and mantissa
|
||||||
|
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||||
|
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||||
|
|
||||||
|
// NaN test
|
||||||
|
return (h_exp == 0x1F && h_mant != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool IsHalfInfinity(const cl_half v)
|
||||||
|
{
|
||||||
|
// Extract FP16 exponent and mantissa
|
||||||
|
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||||
|
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||||
|
|
||||||
|
// Inf test
|
||||||
|
return (h_exp == 0x1F && h_mant == 0);
|
||||||
|
}
|
||||||
|
|
||||||
cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
|
cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
|
||||||
|
|
||||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||||
|
|||||||
Reference in New Issue
Block a user