Add fp16 testing to conversions and bruteforce (#1975)

Merge the `fp16-staging` branch into `main`, adding fp16 (`half`)
testing to the conversions and math bruteforce tests.

---------

Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
Signed-off-by: Guo, Yilong <yilong.guo@intel.com>
Signed-off-by: John Kesapides <john.kesapides@arm.com>
Co-authored-by: Marcin Hajder <marcin.hajder@gmail.com>
Co-authored-by: Ewan Crawford <ewan@codeplay.com>
Co-authored-by: Wawiorko, Grzegorz <grzegorz.wawiorko@intel.com>
Co-authored-by: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Co-authored-by: Harald van Dijk <harald@gigawatt.nl>
Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
Co-authored-by: Haonan Yang <haonan.yang@intel.com>
Co-authored-by: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com>
Co-authored-by: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com>
Co-authored-by: Wenwan Xing <wenwan.xing@intel.com>
Co-authored-by: Yilong Guo <yilong.guo@intel.com>
Co-authored-by: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Co-authored-by: joshqti <127994991+joshqti@users.noreply.github.com>
Co-authored-by: Pekka Jääskeläinen <pekka.jaaskelainen@tuni.fi>
Co-authored-by: imilenkovic00 <155085410+imilenkovic00@users.noreply.github.com>
Co-authored-by: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com>
Co-authored-by: Aharon Abramson <aharon.abramson@mobileye.com>
This commit is contained in:
Sven van Haastregt
2024-06-18 18:43:11 +02:00
committed by GitHub
parent b3c89ebde0
commit b6941b6c61
30 changed files with 7149 additions and 350 deletions

View File

@@ -1,5 +1,5 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
// Copyright (c) 2017-2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@
#include <vector>
#include <type_traits>
#include <cmath>
#include "basic_test_conversions.h"
@@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128;
int gSkipTesting = 0;
int gForceFTZ = 0;
int gIsRTZ = 0;
int gForceHalfFTZ = 0;
int gIsHalfRTZ = 0;
uint32_t gSimdSize = 1;
int gHasDouble = 0;
int gTestDouble = 1;
int gHasHalfs = 0;
int gTestHalfs = 1;
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
int gMinVectorSize = 0;
@@ -100,6 +105,8 @@ int argCount = 0;
double SubtractTime(uint64_t endTime, uint64_t startTime);
cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
// clang-format off
// for readability sake keep this section unformatted
@@ -256,8 +263,30 @@ std::vector<double> DataInitInfo::specialValuesDouble = {
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
};
// clang-format on
// A table of more difficult cases to get right
std::vector<cl_half> DataInitInfo::specialValuesHalf = {
0xffff,
0x0000,
0x0001,
0x7c00, /*INFINITY*/
0xfc00, /*-INFINITY*/
0x8000, /*-0*/
0x7bff, /*HALF_MAX*/
0x0400, /*HALF_MIN*/
0x03ff, /* Largest denormal */
0x3c00, /* 1 */
0xbc00, /* -1 */
0x3555, /*nearest value to 1/3*/
0x3bff, /*largest number less than one*/
0xc000, /* -2 */
0xfbff, /* -HALF_MAX */
0x8400, /* -HALF_MIN */
0x4248, /* M_PI_H */
0xc248, /* -M_PI_H */
0xbbff, /* Largest negative fraction */
};
// clang-format on
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
// (that's x87 default state). This causes problems with the tests that
@@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void)
#endif
}
template <typename InType, typename OutType>
int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
int vectorSize)
template <typename InType, typename OutType, bool InFP, bool OutFP>
int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
uint32_t count,
int vectorSize)
{
const cl_uchar *a = (const cl_uchar *)gAllowZ;
if (std::is_integral<OutType>::value)
{ // char/uchar/short/ushort/int/uint/long/ulong
if (is_half<OutType, OutFP>())
{
const cl_half *t = (const cl_half *)test;
const cl_half *c = (const cl_half *)gRef;
for (uint32_t i = 0; i < count; i++)
if (t[i] != c[i] &&
// Allow nan's to be binary different
!((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
{
vlog(
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
vectorSize, i, HTF(c[i]), HTF(t[i]));
return i + 1;
}
}
else if (std::is_integral<OutType>::value)
{ // char/uchar/short/ushort/half/int/uint/long/ulong
const OutType *t = (const OutType *)test;
const OutType *c = (const OutType *)gRef;
for (uint32_t i = 0; i < count; i++)
@@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run()
continue;
}
// skip half if we don't have it
if (!gTestHalfs && (inType == khalf || outType == khalf))
{
if (gHasHalfs)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
"off.\n");
}
continue;
}
// skip longs on embedded
if (!gHasLong
&& (inType == klong || outType == klong || inType == kulong
@@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: context(context), device(device), queue(queue), num_elements(0),
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
cl_ulong(0), cl_long(0) })
cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
cl_double(0), cl_ulong(0), cl_long(0) })
{}
@@ -445,11 +505,31 @@ cl_int ConversionsTest::Run()
cl_int ConversionsTest::SetUp(int elements)
{
num_elements = elements;
if (is_extension_available(device, "cl_khr_fp16"))
{
const cl_device_fp_config fpConfigHalf =
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
{
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
}
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
{
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
}
else
{
log_error("Error while acquiring half rounding mode");
return TEST_FAIL;
}
}
return CL_SUCCESS;
}
template <typename InType, typename OutType>
template <typename InType, typename OutType, bool InFP, bool OutFP>
void ConversionsTest::TestTypesConversion(const Type &inType,
const Type &outType, int &testNumber,
int startMinVectorSize)
@@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
sat = (SaturationMode)(sat + 1))
{
// skip illegal saturated conversions to float type
if (kSaturated == sat && (outType == kfloat || outType == kdouble))
if (kSaturated == sat
&& (outType == kfloat || outType == kdouble || outType == khalf))
{
continue;
}
@@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
continue;
}
// skip half if we don't have it
if (!gTestHalfs && (inType == khalf || outType == khalf))
{
if (gHasHalfs)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
"off.\n");
}
continue;
}
// Skip the implicit converts if the rounding mode is
// not default or test is saturated
if (0 == startMinVectorSize)
@@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
gMinVectorSize = 0;
}
if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
sat, round)))
{
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
"FAILED ** \n",
@@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
}
}
template <typename InType, typename OutType>
template <typename InType, typename OutType, bool InFP, bool OutFP>
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
RoundingMode round)
{
@@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
cl_uint threads = GetThreadCount();
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
DataInfoSpec<InType, OutType> init_info(info);
DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
WriteInputBufferInfo writeInputBufferInfo;
int vectorSize;
int error = 0;
@@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
{
writeInputBufferInfo.calcInfo[vectorSize].reset(
new CalcRefValsPat<InType, OutType>());
new CalcRefValsPat<InType, OutType, InFP, OutFP>());
writeInputBufferInfo.calcInfo[vectorSize]->program =
conv_test::MakeProgram(
outType, inType, sat, round, vectorSize,
@@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
if (round == kDefaultRoundingMode && gIsRTZ)
init_info.round = round = kRoundTowardZero;
}
else if (std::is_same<OutType, cl_half>::value && OutFP)
{
if (round == kDefaultRoundingMode && gIsHalfRTZ)
init_info.round = round = kRoundTowardZero;
}
// Figure out how many elements are in a work block
// we handle 64-bit types a bit differently.
@@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
vlog("Input value: 0x%8.8x ",
((unsigned int *)gIn)[error - 1]);
break;
case khalf:
vlog("Input value: %a ",
HTF(((cl_half *)gIn)[error - 1]));
break;
case kfloat:
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
break;
@@ -901,16 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime)
}
#endif
////////////////////////////////////////////////////////////////////////////////
static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
{
cl_uint i;
for (i = 0; i < count; ++i)
allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
}
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
@@ -951,6 +1045,112 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
// destroyed automatically soon after we exit.
}
template <typename T> static bool isnan_fp(const T &v)
{
if (std::is_same<T, cl_half>::value)
{
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
return (h_exp == 0x1F && h_mant != 0);
}
else
{
#if !defined(_WIN32)
return std::isnan(v);
#else
return _isnan(v);
#endif
}
}
template <typename InType>
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType)
{
InType *inp = (InType *)gIn;
for (auto j = 0; j < count; j++)
{
if (isnan_fp<InType>(inp[j]))
memset((char *)mapped + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
template <typename InType, typename OutType>
void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
{
if (std::is_same<OutType, cl_half>::value)
{
for (auto j = 0; j < count; j++)
if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
outp[j] = 0x7e00; // HALF_NAN
}
else
{
for (auto j = 0; j < count; j++)
if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
}
}
void FixNanConversions(Type outType, Type inType, void *d, cl_uint count)
{
if (outType != kfloat && outType != kdouble && outType != khalf)
{
if (inType == kfloat)
ZeroNanToIntCases<float>(count, d, outType);
else if (inType == kdouble)
ZeroNanToIntCases<double>(count, d, outType);
else if (inType == khalf)
ZeroNanToIntCases<cl_half>(count, d, outType);
}
else if (inType == kfloat || inType == kdouble || inType == khalf)
{
// outtype and intype is float or double or half. NaN conversions for
// float/double/half could be any NaN
if (inType == kfloat)
{
float *inp = (float *)gIn;
if (outType == kdouble)
{
double *outp = (double *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == khalf)
{
cl_half *outp = (cl_half *)d;
FixNanToFltConversions(inp, outp, count);
}
}
else if (inType == kdouble)
{
double *inp = (double *)gIn;
if (outType == kfloat)
{
float *outp = (float *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == khalf)
{
cl_half *outp = (cl_half *)d;
FixNanToFltConversions(inp, outp, count);
}
}
else if (inType == khalf)
{
cl_half *inp = (cl_half *)gIn;
if (outType == kfloat)
{
float *outp = (float *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == kdouble)
{
double *outp = (double *)d;
FixNanToFltConversions(inp, outp, count);
}
}
}
}
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
void *data)
@@ -963,7 +1163,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
Type outType =
info->parent->outType; // the data type of the conversion result
Type inType = info->parent->inType; // the data type of the conversion input
size_t j;
cl_int error;
cl_event doneBarrier = info->parent->doneBarrier;
@@ -985,51 +1184,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
// Patch up NaNs conversions to integer to zero -- these can be converted to
// any integer
if (outType != kfloat && outType != kdouble)
{
if (inType == kfloat)
{
float *inp = (float *)gIn;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]))
memset((char *)mapped + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
if (inType == kdouble)
{
double *inp = (double *)gIn;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]))
memset((char *)mapped + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
}
else if (inType == kfloat || inType == kdouble)
{ // outtype and intype is float or double. NaN conversions for float <->
// double can be any NaN
if (inType == kfloat && outType == kdouble)
{
float *inp = (float *)gIn;
double *outp = (double *)mapped;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
}
}
if (inType == kdouble && outType == kfloat)
{
double *inp = (double *)gIn;
float *outp = (float *)mapped;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
}
}
}
FixNanConversions(outType, inType, mapped, count);
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
info->result =
@@ -1077,12 +1232,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
// CalcReferenceValuesComplete exit.
}
//
namespace conv_test {
////////////////////////////////////////////////////////////////////////////////
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
{
DataInitBase *info = (DataInitBase *)p;
@@ -1092,8 +1243,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
return CL_SUCCESS;
}
////////////////////////////////////////////////////////////////////////////////
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
{
DataInitBase *info = (DataInitBase *)p;
@@ -1102,7 +1251,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
Type inType = info->inType;
Type outType = info->outType;
RoundingMode round = info->round;
size_t j;
Force64BitFPUPrecision();
@@ -1110,7 +1258,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
void *a = (cl_uchar *)gAllowZ + job_id * count;
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
if (outType != inType)
{
// create the reference while we wait
@@ -1144,7 +1291,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
qcom_sat = info->sat;
#endif
RoundingMode oldRound = set_round(round, outType);
RoundingMode oldRound;
if (outType == khalf)
{
oldRound = set_round(kRoundToNearestEven, kfloat);
switch (round)
{
default:
case kDefaultRoundingMode:
DataInitInfo::halfRoundingMode =
ConversionsTest::defaultHalfRoundingMode;
break;
case kRoundToNearestEven:
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
break;
case kRoundUp:
DataInitInfo::halfRoundingMode = CL_HALF_RTP;
break;
case kRoundDown:
DataInitInfo::halfRoundingMode = CL_HALF_RTN;
break;
case kRoundTowardZero:
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
break;
}
}
else
oldRound = set_round(round, outType);
if (info->sat)
info->conv_array_sat(d, s, count);
@@ -1156,10 +1329,13 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
// Decide if we allow a zero result in addition to the correctly rounded
// one
memset(a, 0, count);
if (gForceFTZ)
if (gForceFTZ && (inType == kfloat || outType == kfloat))
{
if (inType == kfloat || outType == kfloat)
setAllowZ((uint8_t *)a, (uint32_t *)s, count);
info->set_allow_zero_array((uint8_t *)a, d, s, count);
}
if (gForceHalfFTZ && (inType == khalf || outType == khalf))
{
info->set_allow_zero_array((uint8_t *)a, d, s, count);
}
}
else
@@ -1170,55 +1346,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
// Patch up NaNs conversions to integer to zero -- these can be converted to
// any integer
if (info->outType != kfloat && info->outType != kdouble)
{
if (inType == kfloat)
{
float *inp = (float *)s;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]))
memset((char *)d + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
if (inType == kdouble)
{
double *inp = (double *)s;
for (j = 0; j < count; j++)
{
if (isnan(inp[j]))
memset((char *)d + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
}
else if (inType == kfloat || inType == kdouble)
{ // outtype and intype is float or double. NaN conversions for float <->
// double can be any NaN
if (inType == kfloat && outType == kdouble)
{
float *inp = (float *)s;
for (j = 0; j < count; j++)
{
if (isnan(inp[j])) ((double *)d)[j] = NAN;
}
}
if (inType == kdouble && outType == kfloat)
{
double *inp = (double *)s;
for (j = 0; j < count; j++)
{
if (isnan(inp[j])) ((float *)d)[j] = NAN;
}
}
}
FixNanConversions(outType, inType, d, count);
return CL_SUCCESS;
}
////////////////////////////////////////////////////////////////////////////////
uint64_t GetTime(void)
{
#if defined(__APPLE__)
@@ -1233,8 +1365,6 @@ uint64_t GetTime(void)
#endif
}
////////////////////////////////////////////////////////////////////////////////
// Note: not called reentrantly
void WriteInputBufferComplete(void *data)
{
@@ -1295,8 +1425,6 @@ void WriteInputBufferComplete(void *data)
// automatically soon after we exit.
}
////////////////////////////////////////////////////////////////////////////////
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
RoundingMode round, int vectorSize, cl_kernel *outKernel)
{
@@ -1308,6 +1436,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
if (outType == kdouble || inType == kdouble)
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (outType == khalf || inType == khalf)
source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
// Create the program. This is a bit complicated because we are trying to
// avoid byte and short stores.
if (0 == vectorSize)
@@ -1408,7 +1539,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
*outKernel = NULL;
const char *flags = NULL;
if (gForceFTZ) flags = "-cl-denorms-are-zero";
if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
// build it
std::string sourceString = source.str();