mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Add fp16 testing to conversions and bruteforce (#1975)
Merge the `fp16-staging` branch into `main`, adding fp16 (`half`) testing to the conversions and math bruteforce tests. --------- Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Guo, Yilong <yilong.guo@intel.com> Signed-off-by: John Kesapides <john.kesapides@arm.com> Co-authored-by: Marcin Hajder <marcin.hajder@gmail.com> Co-authored-by: Ewan Crawford <ewan@codeplay.com> Co-authored-by: Wawiorko, Grzegorz <grzegorz.wawiorko@intel.com> Co-authored-by: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com> Co-authored-by: Harald van Dijk <harald@gigawatt.nl> Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com> Co-authored-by: Haonan Yang <haonan.yang@intel.com> Co-authored-by: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com> Co-authored-by: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com> Co-authored-by: Wenwan Xing <wenwan.xing@intel.com> Co-authored-by: Yilong Guo <yilong.guo@intel.com> Co-authored-by: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Co-authored-by: joshqti <127994991+joshqti@users.noreply.github.com> Co-authored-by: Pekka Jääskeläinen <pekka.jaaskelainen@tuni.fi> Co-authored-by: imilenkovic00 <155085410+imilenkovic00@users.noreply.github.com> Co-authored-by: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Co-authored-by: Aharon Abramson <aharon.abramson@mobileye.com>
This commit is contained in:
committed by
GitHub
parent
b3c89ebde0
commit
b6941b6c61
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
// Copyright (c) 2017-2024 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -48,6 +48,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "basic_test_conversions.h"
|
||||
|
||||
@@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128;
|
||||
int gSkipTesting = 0;
|
||||
int gForceFTZ = 0;
|
||||
int gIsRTZ = 0;
|
||||
int gForceHalfFTZ = 0;
|
||||
int gIsHalfRTZ = 0;
|
||||
uint32_t gSimdSize = 1;
|
||||
int gHasDouble = 0;
|
||||
int gTestDouble = 1;
|
||||
int gHasHalfs = 0;
|
||||
int gTestHalfs = 1;
|
||||
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
|
||||
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
|
||||
int gMinVectorSize = 0;
|
||||
@@ -100,6 +105,8 @@ int argCount = 0;
|
||||
|
||||
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||
|
||||
cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||
|
||||
// clang-format off
|
||||
// for readability sake keep this section unformatted
|
||||
@@ -256,8 +263,30 @@ std::vector<double> DataInitInfo::specialValuesDouble = {
|
||||
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
|
||||
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
std::vector<cl_half> DataInitInfo::specialValuesHalf = {
|
||||
0xffff,
|
||||
0x0000,
|
||||
0x0001,
|
||||
0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
// (that's x87 default state). This causes problems with the tests that
|
||||
@@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
|
||||
int vectorSize)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
|
||||
uint32_t count,
|
||||
int vectorSize)
|
||||
{
|
||||
const cl_uchar *a = (const cl_uchar *)gAllowZ;
|
||||
|
||||
if (std::is_integral<OutType>::value)
|
||||
{ // char/uchar/short/ushort/int/uint/long/ulong
|
||||
if (is_half<OutType, OutFP>())
|
||||
{
|
||||
const cl_half *t = (const cl_half *)test;
|
||||
const cl_half *c = (const cl_half *)gRef;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
if (t[i] != c[i] &&
|
||||
// Allow nan's to be binary different
|
||||
!((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
|
||||
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
|
||||
{
|
||||
vlog(
|
||||
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
|
||||
vectorSize, i, HTF(c[i]), HTF(t[i]));
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
else if (std::is_integral<OutType>::value)
|
||||
{ // char/uchar/short/ushort/half/int/uint/long/ulong
|
||||
const OutType *t = (const OutType *)test;
|
||||
const OutType *c = (const OutType *)gRef;
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
@@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run()
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip half if we don't have it
|
||||
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||
{
|
||||
if (gHasHalfs)
|
||||
{
|
||||
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||
gTypeNames[outType], gSaturationNames[sat],
|
||||
gRoundingModeNames[round], gTypeNames[inType]);
|
||||
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||
"off.\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip longs on embedded
|
||||
if (!gHasLong
|
||||
&& (inType == klong || outType == klong || inType == kulong
|
||||
@@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: context(context), device(device), queue(queue), num_elements(0),
|
||||
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
|
||||
cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
|
||||
cl_ulong(0), cl_long(0) })
|
||||
cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
|
||||
cl_double(0), cl_ulong(0), cl_long(0) })
|
||||
{}
|
||||
|
||||
|
||||
@@ -445,11 +505,31 @@ cl_int ConversionsTest::Run()
|
||||
cl_int ConversionsTest::SetUp(int elements)
|
||||
{
|
||||
num_elements = elements;
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
const cl_device_fp_config fpConfigHalf =
|
||||
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
|
||||
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
|
||||
{
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||
}
|
||||
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
|
||||
{
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("Error while acquiring half rounding mode");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
const Type &outType, int &testNumber,
|
||||
int startMinVectorSize)
|
||||
@@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
sat = (SaturationMode)(sat + 1))
|
||||
{
|
||||
// skip illegal saturated conversions to float type
|
||||
if (kSaturated == sat && (outType == kfloat || outType == kdouble))
|
||||
if (kSaturated == sat
|
||||
&& (outType == kfloat || outType == kdouble || outType == khalf))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip half if we don't have it
|
||||
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||
{
|
||||
if (gHasHalfs)
|
||||
{
|
||||
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||
gTypeNames[outType], gSaturationNames[sat],
|
||||
gRoundingModeNames[round], gTypeNames[inType]);
|
||||
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||
"off.\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip the implicit converts if the rounding mode is
|
||||
// not default or test is saturated
|
||||
if (0 == startMinVectorSize)
|
||||
@@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
gMinVectorSize = 0;
|
||||
}
|
||||
|
||||
if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
|
||||
if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
|
||||
sat, round)))
|
||||
{
|
||||
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
|
||||
"FAILED ** \n",
|
||||
@@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
RoundingMode round)
|
||||
{
|
||||
@@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
cl_uint threads = GetThreadCount();
|
||||
|
||||
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
|
||||
DataInfoSpec<InType, OutType> init_info(info);
|
||||
DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
|
||||
WriteInputBufferInfo writeInputBufferInfo;
|
||||
int vectorSize;
|
||||
int error = 0;
|
||||
@@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
|
||||
{
|
||||
writeInputBufferInfo.calcInfo[vectorSize].reset(
|
||||
new CalcRefValsPat<InType, OutType>());
|
||||
new CalcRefValsPat<InType, OutType, InFP, OutFP>());
|
||||
writeInputBufferInfo.calcInfo[vectorSize]->program =
|
||||
conv_test::MakeProgram(
|
||||
outType, inType, sat, round, vectorSize,
|
||||
@@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
if (round == kDefaultRoundingMode && gIsRTZ)
|
||||
init_info.round = round = kRoundTowardZero;
|
||||
}
|
||||
else if (std::is_same<OutType, cl_half>::value && OutFP)
|
||||
{
|
||||
if (round == kDefaultRoundingMode && gIsHalfRTZ)
|
||||
init_info.round = round = kRoundTowardZero;
|
||||
}
|
||||
|
||||
// Figure out how many elements are in a work block
|
||||
// we handle 64-bit types a bit differently.
|
||||
@@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
vlog("Input value: 0x%8.8x ",
|
||||
((unsigned int *)gIn)[error - 1]);
|
||||
break;
|
||||
case khalf:
|
||||
vlog("Input value: %a ",
|
||||
HTF(((cl_half *)gIn)[error - 1]));
|
||||
break;
|
||||
case kfloat:
|
||||
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
|
||||
break;
|
||||
@@ -901,16 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime)
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
|
||||
{
|
||||
cl_uint i;
|
||||
for (i = 0; i < count; ++i)
|
||||
allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
|
||||
}
|
||||
|
||||
|
||||
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
|
||||
|
||||
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
@@ -951,6 +1045,112 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
|
||||
// destroyed automatically soon after we exit.
|
||||
}
|
||||
|
||||
template <typename T> static bool isnan_fp(const T &v)
|
||||
{
|
||||
if (std::is_same<T, cl_half>::value)
|
||||
{
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if !defined(_WIN32)
|
||||
return std::isnan(v);
|
||||
#else
|
||||
return _isnan(v);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType>
|
||||
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType)
|
||||
{
|
||||
InType *inp = (InType *)gIn;
|
||||
for (auto j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan_fp<InType>(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
|
||||
{
|
||||
if (std::is_same<OutType, cl_half>::value)
|
||||
{
|
||||
for (auto j = 0; j < count; j++)
|
||||
if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
|
||||
outp[j] = 0x7e00; // HALF_NAN
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto j = 0; j < count; j++)
|
||||
if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
|
||||
void FixNanConversions(Type outType, Type inType, void *d, cl_uint count)
|
||||
{
|
||||
if (outType != kfloat && outType != kdouble && outType != khalf)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
ZeroNanToIntCases<float>(count, d, outType);
|
||||
else if (inType == kdouble)
|
||||
ZeroNanToIntCases<double>(count, d, outType);
|
||||
else if (inType == khalf)
|
||||
ZeroNanToIntCases<cl_half>(count, d, outType);
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble || inType == khalf)
|
||||
{
|
||||
// outtype and intype is float or double or half. NaN conversions for
|
||||
// float/double/half could be any NaN
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
if (outType == kdouble)
|
||||
{
|
||||
double *outp = (double *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == khalf)
|
||||
{
|
||||
cl_half *outp = (cl_half *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
else if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
if (outType == kfloat)
|
||||
{
|
||||
float *outp = (float *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == khalf)
|
||||
{
|
||||
cl_half *outp = (cl_half *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
else if (inType == khalf)
|
||||
{
|
||||
cl_half *inp = (cl_half *)gIn;
|
||||
if (outType == kfloat)
|
||||
{
|
||||
float *outp = (float *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == kdouble)
|
||||
{
|
||||
double *outp = (double *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
void *data)
|
||||
@@ -963,7 +1163,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
Type outType =
|
||||
info->parent->outType; // the data type of the conversion result
|
||||
Type inType = info->parent->inType; // the data type of the conversion input
|
||||
size_t j;
|
||||
cl_int error;
|
||||
cl_event doneBarrier = info->parent->doneBarrier;
|
||||
|
||||
@@ -985,51 +1184,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
|
||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||
// any integer
|
||||
if (outType != kfloat && outType != kdouble)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble)
|
||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
||||
// double can be any NaN
|
||||
if (inType == kfloat && outType == kdouble)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
double *outp = (double *)mapped;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
if (inType == kdouble && outType == kfloat)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
float *outp = (float *)mapped;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
FixNanConversions(outType, inType, mapped, count);
|
||||
|
||||
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
|
||||
info->result =
|
||||
@@ -1077,12 +1232,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
// CalcReferenceValuesComplete exit.
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
namespace conv_test {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
{
|
||||
DataInitBase *info = (DataInitBase *)p;
|
||||
@@ -1092,8 +1243,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
{
|
||||
DataInitBase *info = (DataInitBase *)p;
|
||||
@@ -1102,7 +1251,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
Type inType = info->inType;
|
||||
Type outType = info->outType;
|
||||
RoundingMode round = info->round;
|
||||
size_t j;
|
||||
|
||||
Force64BitFPUPrecision();
|
||||
|
||||
@@ -1110,7 +1258,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
void *a = (cl_uchar *)gAllowZ + job_id * count;
|
||||
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
|
||||
|
||||
|
||||
if (outType != inType)
|
||||
{
|
||||
// create the reference while we wait
|
||||
@@ -1144,7 +1291,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
qcom_sat = info->sat;
|
||||
#endif
|
||||
|
||||
RoundingMode oldRound = set_round(round, outType);
|
||||
RoundingMode oldRound;
|
||||
if (outType == khalf)
|
||||
{
|
||||
oldRound = set_round(kRoundToNearestEven, kfloat);
|
||||
switch (round)
|
||||
{
|
||||
default:
|
||||
case kDefaultRoundingMode:
|
||||
DataInitInfo::halfRoundingMode =
|
||||
ConversionsTest::defaultHalfRoundingMode;
|
||||
break;
|
||||
case kRoundToNearestEven:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
break;
|
||||
case kRoundUp:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTP;
|
||||
break;
|
||||
case kRoundDown:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTN;
|
||||
break;
|
||||
case kRoundTowardZero:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
oldRound = set_round(round, outType);
|
||||
|
||||
if (info->sat)
|
||||
info->conv_array_sat(d, s, count);
|
||||
@@ -1156,10 +1329,13 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
// Decide if we allow a zero result in addition to the correctly rounded
|
||||
// one
|
||||
memset(a, 0, count);
|
||||
if (gForceFTZ)
|
||||
if (gForceFTZ && (inType == kfloat || outType == kfloat))
|
||||
{
|
||||
if (inType == kfloat || outType == kfloat)
|
||||
setAllowZ((uint8_t *)a, (uint32_t *)s, count);
|
||||
info->set_allow_zero_array((uint8_t *)a, d, s, count);
|
||||
}
|
||||
if (gForceHalfFTZ && (inType == khalf || outType == khalf))
|
||||
{
|
||||
info->set_allow_zero_array((uint8_t *)a, d, s, count);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1170,55 +1346,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
|
||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||
// any integer
|
||||
if (info->outType != kfloat && info->outType != kdouble)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble)
|
||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
||||
// double can be any NaN
|
||||
if (inType == kfloat && outType == kdouble)
|
||||
{
|
||||
float *inp = (float *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j])) ((double *)d)[j] = NAN;
|
||||
}
|
||||
}
|
||||
if (inType == kdouble && outType == kfloat)
|
||||
{
|
||||
double *inp = (double *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j])) ((float *)d)[j] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
FixNanConversions(outType, inType, d, count);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
uint64_t GetTime(void)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
@@ -1233,8 +1365,6 @@ uint64_t GetTime(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Note: not called reentrantly
|
||||
void WriteInputBufferComplete(void *data)
|
||||
{
|
||||
@@ -1295,8 +1425,6 @@ void WriteInputBufferComplete(void *data)
|
||||
// automatically soon after we exit.
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
RoundingMode round, int vectorSize, cl_kernel *outKernel)
|
||||
{
|
||||
@@ -1308,6 +1436,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
if (outType == kdouble || inType == kdouble)
|
||||
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
|
||||
if (outType == khalf || inType == khalf)
|
||||
source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
|
||||
// Create the program. This is a bit complicated because we are trying to
|
||||
// avoid byte and short stores.
|
||||
if (0 == vectorSize)
|
||||
@@ -1408,7 +1539,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
*outKernel = NULL;
|
||||
|
||||
const char *flags = NULL;
|
||||
if (gForceFTZ) flags = "-cl-denorms-are-zero";
|
||||
if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
|
||||
|
||||
// build it
|
||||
std::string sourceString = source.str();
|
||||
|
||||
Reference in New Issue
Block a user