mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fp16 conversions staging (#1864)
* Added unification of existing conversions test as preparation for cl_khr_fp16 adaptation * Unified initialization procedures for conversions test. * Completed unification of data structures to handle cl_khr_fp16 * Added support for selective launch of the test * Added half support for test_conversions, work in progres (issue #142, conversions) * Added more work on halfs support for conversions test (issue #142, conversions) * Added cosmetic corrections * Added more cosmetic corrections before opening draft PR * Added corrections related to pre-submit windows build * Added more pre-build related corrections * Added pre-submit ubuntu build related correction * Added more pre-submit related corrections * Divided structures into separate source files (issue #142, conversions) * Added more corrections related to presubmit check * Removed redeclarations due to presubmit check * Added more corrections related to presubmit check arm build * Added cosmetic correction * Adapted modifications from related PR #1719 to avoid merging conflicts * fixed clang format * Added corrections related to code review (cl_khr_fp16 suuport according to issue #142) * Corrections related to macos CI check fail * fix for unclear clang format discrepancy * More corrections related to code review (cl_khr_fp16 for conversions #142) --------- Co-authored-by: Ewan Crawford <ewan@codeplay.com>
This commit is contained in:
@@ -42,10 +42,11 @@ typedef enum
|
||||
kshort = 3,
|
||||
kuint = 4,
|
||||
kint = 5,
|
||||
kfloat = 6,
|
||||
kdouble = 7,
|
||||
kulong = 8,
|
||||
klong = 9,
|
||||
khalf = 6,
|
||||
kfloat = 7,
|
||||
kdouble = 8,
|
||||
kulong = 9,
|
||||
klong = 10,
|
||||
|
||||
// This goes last
|
||||
kTypeCount
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "basic_test_conversions.h"
|
||||
|
||||
@@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128;
|
||||
int gSkipTesting = 0;
|
||||
int gForceFTZ = 0;
|
||||
int gIsRTZ = 0;
|
||||
int gForceHalfFTZ = 0;
|
||||
int gIsHalfRTZ = 0;
|
||||
uint32_t gSimdSize = 1;
|
||||
int gHasDouble = 0;
|
||||
int gTestDouble = 1;
|
||||
int gHasHalfs = 0;
|
||||
int gTestHalfs = 1;
|
||||
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
|
||||
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
|
||||
int gMinVectorSize = 0;
|
||||
@@ -100,6 +105,8 @@ int argCount = 0;
|
||||
|
||||
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||
|
||||
cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||
|
||||
// clang-format off
|
||||
// for readability sake keep this section unformatted
|
||||
@@ -256,8 +263,30 @@ std::vector<double> DataInitInfo::specialValuesDouble = {
|
||||
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
|
||||
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
std::vector<cl_half> DataInitInfo::specialValuesHalf = {
|
||||
0xffff,
|
||||
0x0000,
|
||||
0x0001,
|
||||
0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x03ff, /* Largest denormal */
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
// (that's x87 default state). This causes problems with the tests that
|
||||
@@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
|
||||
int vectorSize)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
|
||||
uint32_t count,
|
||||
int vectorSize)
|
||||
{
|
||||
const cl_uchar *a = (const cl_uchar *)gAllowZ;
|
||||
|
||||
if (std::is_integral<OutType>::value)
|
||||
{ // char/uchar/short/ushort/int/uint/long/ulong
|
||||
if (is_half<OutType, OutFP>())
|
||||
{
|
||||
const cl_half *t = (const cl_half *)test;
|
||||
const cl_half *c = (const cl_half *)gRef;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
if (t[i] != c[i] &&
|
||||
// Allow nan's to be binary different
|
||||
!((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
|
||||
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
|
||||
{
|
||||
vlog(
|
||||
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
|
||||
vectorSize, i, HTF(c[i]), HTF(t[i]));
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
else if (std::is_integral<OutType>::value)
|
||||
{ // char/uchar/short/ushort/half/int/uint/long/ulong
|
||||
const OutType *t = (const OutType *)test;
|
||||
const OutType *c = (const OutType *)gRef;
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
@@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run()
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip half if we don't have it
|
||||
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||
{
|
||||
if (gHasHalfs)
|
||||
{
|
||||
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||
gTypeNames[outType], gSaturationNames[sat],
|
||||
gRoundingModeNames[round], gTypeNames[inType]);
|
||||
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||
"off.\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip longs on embedded
|
||||
if (!gHasLong
|
||||
&& (inType == klong || outType == klong || inType == kulong
|
||||
@@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: context(context), device(device), queue(queue), num_elements(0),
|
||||
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
|
||||
cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
|
||||
cl_ulong(0), cl_long(0) })
|
||||
cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
|
||||
cl_double(0), cl_ulong(0), cl_long(0) })
|
||||
{}
|
||||
|
||||
|
||||
@@ -445,11 +505,31 @@ cl_int ConversionsTest::Run()
|
||||
cl_int ConversionsTest::SetUp(int elements)
|
||||
{
|
||||
num_elements = elements;
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
const cl_device_fp_config fpConfigHalf =
|
||||
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
|
||||
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
|
||||
{
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
|
||||
}
|
||||
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
|
||||
{
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("Error while acquiring half rounding mode");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
const Type &outType, int &testNumber,
|
||||
int startMinVectorSize)
|
||||
@@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
sat = (SaturationMode)(sat + 1))
|
||||
{
|
||||
// skip illegal saturated conversions to float type
|
||||
if (kSaturated == sat && (outType == kfloat || outType == kdouble))
|
||||
if (kSaturated == sat
|
||||
&& (outType == kfloat || outType == kdouble || outType == khalf))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip half if we don't have it
|
||||
if (!gTestHalfs && (inType == khalf || outType == khalf))
|
||||
{
|
||||
if (gHasHalfs)
|
||||
{
|
||||
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
|
||||
gTypeNames[outType], gSaturationNames[sat],
|
||||
gRoundingModeNames[round], gTypeNames[inType]);
|
||||
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
|
||||
"off.\n");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip the implicit converts if the rounding mode is
|
||||
// not default or test is saturated
|
||||
if (0 == startMinVectorSize)
|
||||
@@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
gMinVectorSize = 0;
|
||||
}
|
||||
|
||||
if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
|
||||
if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
|
||||
sat, round)))
|
||||
{
|
||||
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
|
||||
"FAILED ** \n",
|
||||
@@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
RoundingMode round)
|
||||
{
|
||||
@@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
cl_uint threads = GetThreadCount();
|
||||
|
||||
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
|
||||
DataInfoSpec<InType, OutType> init_info(info);
|
||||
DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
|
||||
WriteInputBufferInfo writeInputBufferInfo;
|
||||
int vectorSize;
|
||||
int error = 0;
|
||||
@@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
|
||||
{
|
||||
writeInputBufferInfo.calcInfo[vectorSize].reset(
|
||||
new CalcRefValsPat<InType, OutType>());
|
||||
new CalcRefValsPat<InType, OutType, InFP, OutFP>());
|
||||
writeInputBufferInfo.calcInfo[vectorSize]->program =
|
||||
conv_test::MakeProgram(
|
||||
outType, inType, sat, round, vectorSize,
|
||||
@@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
if (round == kDefaultRoundingMode && gIsRTZ)
|
||||
init_info.round = round = kRoundTowardZero;
|
||||
}
|
||||
else if (std::is_same<OutType, cl_half>::value && OutFP)
|
||||
{
|
||||
if (round == kDefaultRoundingMode && gIsHalfRTZ)
|
||||
init_info.round = round = kRoundTowardZero;
|
||||
}
|
||||
|
||||
// Figure out how many elements are in a work block
|
||||
// we handle 64-bit types a bit differently.
|
||||
@@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
vlog("Input value: 0x%8.8x ",
|
||||
((unsigned int *)gIn)[error - 1]);
|
||||
break;
|
||||
case khalf:
|
||||
vlog("Input value: %a ",
|
||||
HTF(((cl_half *)gIn)[error - 1]));
|
||||
break;
|
||||
case kfloat:
|
||||
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
|
||||
break;
|
||||
@@ -901,8 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime)
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
|
||||
{
|
||||
cl_uint i;
|
||||
@@ -951,6 +1053,112 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
|
||||
// destroyed automatically soon after we exit.
|
||||
}
|
||||
|
||||
template <typename T> static bool isnan_fp(const T &v)
|
||||
{
|
||||
if (std::is_same<T, cl_half>::value)
|
||||
{
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if !defined(_WIN32)
|
||||
return std::isnan(v);
|
||||
#else
|
||||
return _isnan(v);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType>
|
||||
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType)
|
||||
{
|
||||
InType *inp = (InType *)gIn;
|
||||
for (auto j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan_fp<InType>(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
|
||||
{
|
||||
if (std::is_same<OutType, cl_half>::value)
|
||||
{
|
||||
for (auto j = 0; j < count; j++)
|
||||
if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
|
||||
outp[j] = 0x7e00; // HALF_NAN
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto j = 0; j < count; j++)
|
||||
if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
|
||||
void FixNanConversions(Type outType, Type inType, void *d, cl_uint count)
|
||||
{
|
||||
if (outType != kfloat && outType != kdouble && outType != khalf)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
ZeroNanToIntCases<float>(count, d, outType);
|
||||
else if (inType == kdouble)
|
||||
ZeroNanToIntCases<double>(count, d, outType);
|
||||
else if (inType == khalf)
|
||||
ZeroNanToIntCases<cl_half>(count, d, outType);
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble || inType == khalf)
|
||||
{
|
||||
// outtype and intype is float or double or half. NaN conversions for
|
||||
// float/double/half could be any NaN
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
if (outType == kdouble)
|
||||
{
|
||||
double *outp = (double *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == khalf)
|
||||
{
|
||||
cl_half *outp = (cl_half *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
else if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
if (outType == kfloat)
|
||||
{
|
||||
float *outp = (float *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == khalf)
|
||||
{
|
||||
cl_half *outp = (cl_half *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
else if (inType == khalf)
|
||||
{
|
||||
cl_half *inp = (cl_half *)gIn;
|
||||
if (outType == kfloat)
|
||||
{
|
||||
float *outp = (float *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
else if (outType == kdouble)
|
||||
{
|
||||
double *outp = (double *)d;
|
||||
FixNanToFltConversions(inp, outp, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
void *data)
|
||||
@@ -963,7 +1171,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
Type outType =
|
||||
info->parent->outType; // the data type of the conversion result
|
||||
Type inType = info->parent->inType; // the data type of the conversion input
|
||||
size_t j;
|
||||
cl_int error;
|
||||
cl_event doneBarrier = info->parent->doneBarrier;
|
||||
|
||||
@@ -985,51 +1192,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
|
||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||
// any integer
|
||||
if (outType != kfloat && outType != kdouble)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)mapped + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble)
|
||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
||||
// double can be any NaN
|
||||
if (inType == kfloat && outType == kdouble)
|
||||
{
|
||||
float *inp = (float *)gIn;
|
||||
double *outp = (double *)mapped;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
if (inType == kdouble && outType == kfloat)
|
||||
{
|
||||
double *inp = (double *)gIn;
|
||||
float *outp = (float *)mapped;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
FixNanConversions(outType, inType, mapped, count);
|
||||
|
||||
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
|
||||
info->result =
|
||||
@@ -1077,12 +1240,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
|
||||
// CalcReferenceValuesComplete exit.
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
namespace conv_test {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
{
|
||||
DataInitBase *info = (DataInitBase *)p;
|
||||
@@ -1092,8 +1251,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
{
|
||||
DataInitBase *info = (DataInitBase *)p;
|
||||
@@ -1102,7 +1259,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
Type inType = info->inType;
|
||||
Type outType = info->outType;
|
||||
RoundingMode round = info->round;
|
||||
size_t j;
|
||||
|
||||
Force64BitFPUPrecision();
|
||||
|
||||
@@ -1110,7 +1266,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
void *a = (cl_uchar *)gAllowZ + job_id * count;
|
||||
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
|
||||
|
||||
|
||||
if (outType != inType)
|
||||
{
|
||||
// create the reference while we wait
|
||||
@@ -1144,7 +1299,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
qcom_sat = info->sat;
|
||||
#endif
|
||||
|
||||
RoundingMode oldRound = set_round(round, outType);
|
||||
RoundingMode oldRound;
|
||||
if (outType == khalf)
|
||||
{
|
||||
oldRound = set_round(kRoundToNearestEven, kfloat);
|
||||
switch (round)
|
||||
{
|
||||
default:
|
||||
case kDefaultRoundingMode:
|
||||
DataInitInfo::halfRoundingMode =
|
||||
ConversionsTest::defaultHalfRoundingMode;
|
||||
break;
|
||||
case kRoundToNearestEven:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
|
||||
break;
|
||||
case kRoundUp:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTP;
|
||||
break;
|
||||
case kRoundDown:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTN;
|
||||
break;
|
||||
case kRoundTowardZero:
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
oldRound = set_round(round, outType);
|
||||
|
||||
if (info->sat)
|
||||
info->conv_array_sat(d, s, count);
|
||||
@@ -1161,6 +1342,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
if (inType == kfloat || outType == kfloat)
|
||||
setAllowZ((uint8_t *)a, (uint32_t *)s, count);
|
||||
}
|
||||
if (gForceHalfFTZ)
|
||||
{
|
||||
if (inType == khalf || outType == khalf)
|
||||
setAllowZ((uint8_t *)a, (uint32_t *)s, count);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1170,55 +1356,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
|
||||
|
||||
// Patch up NaNs conversions to integer to zero -- these can be converted to
|
||||
// any integer
|
||||
if (info->outType != kfloat && info->outType != kdouble)
|
||||
{
|
||||
if (inType == kfloat)
|
||||
{
|
||||
float *inp = (float *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
if (inType == kdouble)
|
||||
{
|
||||
double *inp = (double *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j]))
|
||||
memset((char *)d + j * gTypeSizes[outType], 0,
|
||||
gTypeSizes[outType]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inType == kfloat || inType == kdouble)
|
||||
{ // outtype and intype is float or double. NaN conversions for float <->
|
||||
// double can be any NaN
|
||||
if (inType == kfloat && outType == kdouble)
|
||||
{
|
||||
float *inp = (float *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j])) ((double *)d)[j] = NAN;
|
||||
}
|
||||
}
|
||||
if (inType == kdouble && outType == kfloat)
|
||||
{
|
||||
double *inp = (double *)s;
|
||||
for (j = 0; j < count; j++)
|
||||
{
|
||||
if (isnan(inp[j])) ((float *)d)[j] = NAN;
|
||||
}
|
||||
}
|
||||
}
|
||||
FixNanConversions(outType, inType, d, count);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
uint64_t GetTime(void)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
@@ -1233,8 +1375,6 @@ uint64_t GetTime(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Note: not called reentrantly
|
||||
void WriteInputBufferComplete(void *data)
|
||||
{
|
||||
@@ -1295,8 +1435,6 @@ void WriteInputBufferComplete(void *data)
|
||||
// automatically soon after we exit.
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
RoundingMode round, int vectorSize, cl_kernel *outKernel)
|
||||
{
|
||||
@@ -1308,6 +1446,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
if (outType == kdouble || inType == kdouble)
|
||||
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
|
||||
if (outType == khalf || inType == khalf)
|
||||
source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
|
||||
// Create the program. This is a bit complicated because we are trying to
|
||||
// avoid byte and short stores.
|
||||
if (0 == vectorSize)
|
||||
@@ -1408,7 +1549,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
|
||||
*outKernel = NULL;
|
||||
|
||||
const char *flags = NULL;
|
||||
if (gForceFTZ) flags = "-cl-denorms-are-zero";
|
||||
if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
|
||||
|
||||
// build it
|
||||
std::string sourceString = source.str();
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#include <CL/cl_half.h>
|
||||
|
||||
#include "harness/mt19937.h"
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
@@ -76,6 +78,8 @@ extern cl_mem gInBuffer;
|
||||
extern cl_mem gOutBuffers[];
|
||||
extern int gHasDouble;
|
||||
extern int gTestDouble;
|
||||
extern int gHasHalfs;
|
||||
extern int gTestHalfs;
|
||||
extern int gWimpyMode;
|
||||
extern int gWimpyReductionFactor;
|
||||
extern int gSkipTesting;
|
||||
@@ -87,6 +91,8 @@ extern int gReportAverageTimes;
|
||||
extern int gStartTestNumber;
|
||||
extern int gEndTestNumber;
|
||||
extern int gIsRTZ;
|
||||
extern int gForceHalfFTZ;
|
||||
extern int gIsHalfRTZ;
|
||||
extern void *gIn;
|
||||
extern void *gRef;
|
||||
extern void *gAllowZ;
|
||||
@@ -135,7 +141,7 @@ struct CalcRefValsBase
|
||||
cl_int result;
|
||||
};
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
struct CalcRefValsPat : CalcRefValsBase
|
||||
{
|
||||
int check_result(void *, uint32_t, int) override;
|
||||
@@ -162,8 +168,12 @@ struct WriteInputBufferInfo
|
||||
};
|
||||
|
||||
// Must be aligned with Type enums!
|
||||
using TypeIter = std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint,
|
||||
cl_int, cl_float, cl_double, cl_ulong, cl_long>;
|
||||
using TypeIter =
|
||||
std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint, cl_int, cl_half,
|
||||
cl_float, cl_double, cl_ulong, cl_long>;
|
||||
|
||||
// hardcoded solution needed due to typeid confusing cl_ushort/cl_half
|
||||
constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0 };
|
||||
|
||||
// Helper test fixture for constructing OpenCL objects used in testing
|
||||
// a variety of simple command-buffer enqueue scenarios.
|
||||
@@ -179,13 +189,13 @@ struct ConversionsTest
|
||||
// Test body returning an OpenCL error code
|
||||
cl_int Run();
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
int DoTest(Type outType, Type inType, SaturationMode sat,
|
||||
RoundingMode round);
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void TestTypesConversion(const Type &inType, const Type &outType, int &tn,
|
||||
const int smvs);
|
||||
int startMinVectorSize);
|
||||
|
||||
protected:
|
||||
cl_context context;
|
||||
@@ -195,6 +205,9 @@ protected:
|
||||
size_t num_elements;
|
||||
|
||||
TypeIter typeIterator;
|
||||
|
||||
public:
|
||||
static cl_half_rounding_mode defaultHalfRoundingMode;
|
||||
};
|
||||
|
||||
struct CustomConversionsTest : ConversionsTest
|
||||
@@ -221,17 +234,18 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
|
||||
|
||||
struct TestType
|
||||
{
|
||||
template <typename T> bool testType(Type in)
|
||||
template <typename T, bool FP> bool testType(Type in)
|
||||
{
|
||||
switch (in)
|
||||
{
|
||||
default: return false;
|
||||
case kuchar: return std::is_same<cl_uchar, T>::value;
|
||||
case kchar: return std::is_same<cl_char, T>::value;
|
||||
case kushort: return std::is_same<cl_ushort, T>::value;
|
||||
case kushort: return std::is_same<cl_ushort, T>::value && !FP;
|
||||
case kshort: return std::is_same<cl_short, T>::value;
|
||||
case kuint: return std::is_same<cl_uint, T>::value;
|
||||
case kint: return std::is_same<cl_int, T>::value;
|
||||
case khalf: return std::is_same<cl_half, T>::value && FP;
|
||||
case kfloat: return std::is_same<cl_float, T>::value;
|
||||
case kdouble: return std::is_same<cl_double, T>::value;
|
||||
case kulong: return std::is_same<cl_ulong, T>::value;
|
||||
@@ -263,13 +277,15 @@ protected:
|
||||
typename InType>
|
||||
void iterate_in_type(const InType &t)
|
||||
{
|
||||
if (!testType<InType>(inType)) vlog_error("Unexpected data type!\n");
|
||||
if (!testType<InType, isTypeFp[In]>(inType))
|
||||
vlog_error("Unexpected data type!\n");
|
||||
|
||||
if (!testType<OutType>(outType)) vlog_error("Unexpected data type!\n");
|
||||
if (!testType<OutType, isTypeFp[Out]>(outType))
|
||||
vlog_error("Unexpected data type!\n");
|
||||
|
||||
// run the conversions
|
||||
test.TestTypesConversion<InType, OutType>(inType, outType, testNumber,
|
||||
startMinVectorSize);
|
||||
test.TestTypesConversion<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
|
||||
inType, outType, testNumber, startMinVectorSize);
|
||||
inType = (Type)(inType + 1);
|
||||
}
|
||||
|
||||
@@ -337,11 +353,13 @@ protected:
|
||||
typename InType>
|
||||
void iterate_in_type(const InType &t)
|
||||
{
|
||||
if (testType<InType>(inType) && testType<OutType>(outType))
|
||||
if (testType<InType, isTypeFp[In]>(inType)
|
||||
&& testType<OutType, isTypeFp[Out]>(outType))
|
||||
{
|
||||
// run selected conversion
|
||||
// testing of the result will happen afterwards
|
||||
test.DoTest<InType, OutType>(outType, inType, saturation, rounding);
|
||||
test.DoTest<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
|
||||
outType, inType, saturation, rounding);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,8 +28,11 @@ extern bool qcom_sat;
|
||||
extern roundingMode qcom_rm;
|
||||
#endif
|
||||
|
||||
#include <CL/cl_half.h>
|
||||
|
||||
#include "harness/mt19937.h"
|
||||
#include "harness/rounding_mode.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -60,11 +63,17 @@ struct DataInitInfo
|
||||
RoundingMode round;
|
||||
cl_uint threads;
|
||||
|
||||
static cl_half_rounding_mode halfRoundingMode;
|
||||
static std::vector<uint32_t> specialValuesUInt;
|
||||
static std::vector<float> specialValuesFloat;
|
||||
static std::vector<double> specialValuesDouble;
|
||||
static std::vector<cl_half> specialValuesHalf;
|
||||
};
|
||||
|
||||
#define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode)
|
||||
#define HTF(num) cl_half_to_float(num)
|
||||
#define HFD(num) cl_half_from_double(num, DataInitInfo::halfRoundingMode)
|
||||
|
||||
struct DataInitBase : public DataInitInfo
|
||||
{
|
||||
virtual ~DataInitBase() = default;
|
||||
@@ -75,7 +84,7 @@ struct DataInitBase : public DataInitInfo
|
||||
virtual void init(const cl_uint &, const cl_uint &) {}
|
||||
};
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
struct DataInfoSpec : public DataInitBase
|
||||
{
|
||||
explicit DataInfoSpec(const DataInitInfo &agg);
|
||||
@@ -98,6 +107,16 @@ struct DataInfoSpec : public DataInitBase
|
||||
|
||||
std::vector<MTdataHolder> mdv;
|
||||
|
||||
constexpr bool is_in_half() const
|
||||
{
|
||||
return (std::is_same<InType, cl_half>::value && InFP);
|
||||
}
|
||||
|
||||
constexpr bool is_out_half() const
|
||||
{
|
||||
return (std::is_same<OutType, cl_half>::value && OutFP);
|
||||
}
|
||||
|
||||
void conv_array(void *out, void *in, size_t n) override
|
||||
{
|
||||
for (size_t i = 0; i < n; i++)
|
||||
@@ -125,19 +144,22 @@ struct DataInfoSpec : public DataInitBase
|
||||
}
|
||||
};
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
DataInfoSpec<InType, OutType, InFP, OutFP>::DataInfoSpec(
|
||||
const DataInitInfo &agg)
|
||||
: DataInitBase(agg), mdv(0)
|
||||
{
|
||||
if (std::is_same<cl_float, OutType>::value)
|
||||
ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
|
||||
else if (std::is_same<cl_double, OutType>::value)
|
||||
ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
|
||||
else if (std::is_same<cl_half, OutType>::value && OutFP)
|
||||
ranges = std::make_pair(HFF(CL_HALF_MIN), HFF(CL_HALF_MAX));
|
||||
else if (std::is_same<cl_uchar, OutType>::value)
|
||||
ranges = std::make_pair(0, CL_UCHAR_MAX);
|
||||
else if (std::is_same<cl_char, OutType>::value)
|
||||
ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
|
||||
else if (std::is_same<cl_ushort, OutType>::value)
|
||||
else if (std::is_same<cl_ushort, OutType>::value && !OutFP)
|
||||
ranges = std::make_pair(0, CL_USHRT_MAX);
|
||||
else if (std::is_same<cl_short, OutType>::value)
|
||||
ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
|
||||
@@ -158,12 +180,12 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
||||
InType outMax = static_cast<InType>(ranges.second);
|
||||
|
||||
InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
|
||||
if (std::is_integral<OutType>::value)
|
||||
if (std::is_integral<OutType>::value && !OutFP)
|
||||
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
||||
if (sizeof(OutType)<=sizeof(cl_short))
|
||||
{ // to char/uchar/short/ushort
|
||||
clamp_ranges=
|
||||
{{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
||||
{{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
||||
{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
||||
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
|
||||
{outMin-0.0f, outMax - outMax * 0.5f * eps },
|
||||
@@ -249,11 +271,55 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (is_in_half())
|
||||
{
|
||||
float outMin = static_cast<float>(ranges.first);
|
||||
float outMax = static_cast<float>(ranges.second);
|
||||
float eps = CL_HALF_EPSILON;
|
||||
cl_half_rounding_mode prev_half_round = DataInitInfo::halfRoundingMode;
|
||||
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
||||
|
||||
if (std::is_integral<OutType>::value)
|
||||
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
||||
if (sizeof(OutType)<=sizeof(cl_char) || std::is_same<OutType, cl_short>::value)
|
||||
{ // to char/uchar
|
||||
clamp_ranges=
|
||||
{{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
||||
{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
||||
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax-1.f)},
|
||||
{HFF(outMin-0.0f), HFF(outMax - outMax * 0.5f * eps) },
|
||||
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax - outMax * 0.5f * eps)}};
|
||||
}
|
||||
else
|
||||
{ // to ushort/int/uint/long/ulong
|
||||
if (std::is_signed<OutType>::value)
|
||||
{
|
||||
clamp_ranges=
|
||||
{ {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
||||
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}};
|
||||
}
|
||||
else
|
||||
{
|
||||
clamp_ranges=
|
||||
{ {HFF(outMin), HFF(CL_HALF_MAX)},
|
||||
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||
{HFF(outMin), HFF(CL_HALF_MAX)},
|
||||
{HFF(outMin), HFF(CL_HALF_MAX)}};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DataInitInfo::halfRoundingMode = prev_half_round;
|
||||
}
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
float DataInfoSpec<InType, OutType>::round_to_int(float f)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
||||
{
|
||||
static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
|
||||
-MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
|
||||
@@ -281,8 +347,9 @@ float DataInfoSpec<InType, OutType>::round_to_int(float f)
|
||||
return f;
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
long long
|
||||
DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
||||
{
|
||||
static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
|
||||
MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
|
||||
@@ -313,8 +380,8 @@ long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
|
||||
return (long long)f;
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
OutType DataInfoSpec<InType, OutType, InFP, OutFP>::absolute(const OutType &x)
|
||||
{
|
||||
union {
|
||||
cl_uint u;
|
||||
@@ -331,17 +398,30 @@ OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
|
||||
return u.f;
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
template <typename T, bool fp> constexpr bool is_half()
|
||||
{
|
||||
if (std::is_same<cl_float, InType>::value)
|
||||
return (std::is_same<cl_half, T>::value && fp);
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
||||
{
|
||||
if (std::is_same<cl_float, InType>::value || is_in_half())
|
||||
{
|
||||
cl_float inVal = *in;
|
||||
if (std::is_same<cl_half, InType>::value)
|
||||
{
|
||||
inVal = HTF(*in);
|
||||
}
|
||||
|
||||
if (std::is_floating_point<OutType>::value)
|
||||
{
|
||||
*out = (OutType)inVal;
|
||||
}
|
||||
else if (is_out_half())
|
||||
{
|
||||
*out = HFF(*in);
|
||||
}
|
||||
else if (std::is_same<cl_ulong, OutType>::value)
|
||||
{
|
||||
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||
@@ -376,6 +456,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
{
|
||||
if (std::is_same<cl_float, OutType>::value)
|
||||
*out = (OutType)*in;
|
||||
else if (is_out_half())
|
||||
*out = static_cast<OutType>(HFD(*in));
|
||||
else
|
||||
*out = rint(*in);
|
||||
}
|
||||
@@ -417,7 +499,7 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
*out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
|
||||
#endif
|
||||
}
|
||||
else if (std::is_same<cl_float, OutType>::value)
|
||||
else if (std::is_same<cl_float, OutType>::value || is_out_half())
|
||||
{
|
||||
cl_float outVal = 0.f;
|
||||
|
||||
@@ -463,7 +545,9 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
*out = outVal;
|
||||
*out = std::is_same<cl_half, OutType>::value
|
||||
? static_cast<OutType>(HFF(outVal))
|
||||
: outVal;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -484,6 +568,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
// Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
|
||||
*out = (*in == 0 ? 0.0 : *in);
|
||||
}
|
||||
else if (is_out_half())
|
||||
*out = static_cast<OutType>(HFF(*in == 0 ? 0.f : *in));
|
||||
else
|
||||
{
|
||||
*out = (OutType)*in;
|
||||
@@ -494,19 +580,26 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
|
||||
#define CLAMP(_lo, _x, _hi) \
|
||||
((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv_sat(OutType *out,
|
||||
InType *in)
|
||||
{
|
||||
if (std::is_floating_point<InType>::value)
|
||||
if (std::is_floating_point<InType>::value || is_in_half())
|
||||
{
|
||||
if (std::is_floating_point<OutType>::value)
|
||||
{ // in float/double, out float/double
|
||||
*out = (OutType)(*in);
|
||||
cl_float inVal = *in;
|
||||
if (is_in_half()) inVal = HTF(*in);
|
||||
|
||||
if (std::is_floating_point<OutType>::value || is_out_half())
|
||||
{ // in half/float/double, out half/float/double
|
||||
if (is_out_half())
|
||||
*out = static_cast<OutType>(HFF(inVal));
|
||||
else
|
||||
*out = (OutType)(is_in_half() ? inVal : *in);
|
||||
}
|
||||
else if ((std::is_same<InType, cl_float>::value)
|
||||
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
||||
&& std::is_same<cl_ulong, OutType>::value)
|
||||
{
|
||||
cl_float x = round_to_int(*in);
|
||||
cl_float x = round_to_int(is_in_half() ? HTF(*in) : *in);
|
||||
|
||||
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||
// VS2005 (at least) on x86 uses fistp to store the float as a
|
||||
@@ -534,47 +627,57 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
||||
}
|
||||
#else
|
||||
*out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
||||
? 0xFFFFFFFFFFFFFFFFULL
|
||||
: x < 0 ? 0 : (OutType)x;
|
||||
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
||||
: x < 0 ? 0
|
||||
: (OutType)x;
|
||||
#endif
|
||||
}
|
||||
else if ((std::is_same<InType, cl_float>::value)
|
||||
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
||||
&& std::is_same<cl_long, OutType>::value)
|
||||
{
|
||||
cl_float f = round_to_int(*in);
|
||||
cl_float f = round_to_int(is_in_half() ? HTF(*in) : *in);
|
||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
||||
? 0x7FFFFFFFFFFFFFFFULL
|
||||
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
||||
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
||||
? 0x8000000000000000LL
|
||||
: (OutType)f;
|
||||
? (OutType)0x8000000000000000LL
|
||||
: (OutType)f;
|
||||
}
|
||||
else if (std::is_same<InType, cl_double>::value
|
||||
&& std::is_same<cl_ulong, OutType>::value)
|
||||
{
|
||||
InType f = rint(*in);
|
||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
||||
? 0xFFFFFFFFFFFFFFFFULL
|
||||
: f < 0 ? 0 : (OutType)f;
|
||||
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
||||
: f < 0 ? 0
|
||||
: (OutType)f;
|
||||
}
|
||||
else if (std::is_same<InType, cl_double>::value
|
||||
&& std::is_same<cl_long, OutType>::value)
|
||||
{
|
||||
InType f = rint(*in);
|
||||
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
||||
? 0x7FFFFFFFFFFFFFFFULL
|
||||
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
||||
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
||||
? 0x8000000000000000LL
|
||||
: (OutType)f;
|
||||
? (OutType)0x8000000000000000LL
|
||||
: (OutType)f;
|
||||
}
|
||||
else
|
||||
{ // in float/double, out char/uchar/short/ushort/int/uint
|
||||
*out =
|
||||
CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
|
||||
{ // in half/float/double, out char/uchar/short/ushort/int/uint
|
||||
*out = CLAMP(ranges.first,
|
||||
round_to_int_and_clamp(is_in_half() ? inVal : *in),
|
||||
ranges.second);
|
||||
}
|
||||
}
|
||||
else if (std::is_integral<InType>::value
|
||||
&& std::is_integral<OutType>::value)
|
||||
{
|
||||
if (is_out_half())
|
||||
{
|
||||
*out = std::is_signed<InType>::value
|
||||
? static_cast<OutType>(HFF((cl_float)*in))
|
||||
: absolute(static_cast<OutType>(HFF((cl_float)*in)));
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((std::is_signed<InType>::value
|
||||
&& std::is_signed<OutType>::value)
|
||||
@@ -612,14 +715,40 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
|
||||
const cl_uint &thread_id)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
void DataInfoSpec<InType, OutType, InFP, OutFP>::init(const cl_uint &job_id,
|
||||
const cl_uint &thread_id)
|
||||
{
|
||||
uint64_t ulStart = start;
|
||||
void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
|
||||
|
||||
if (std::is_integral<InType>::value)
|
||||
if (is_in_half())
|
||||
{
|
||||
cl_half *o = (cl_half *)pIn;
|
||||
int i;
|
||||
|
||||
if (gIsEmbedded)
|
||||
for (i = 0; i < size; i++)
|
||||
o[i] = (cl_half)genrand_int32(mdv[thread_id]);
|
||||
else
|
||||
for (i = 0; i < size; i++) o[i] = (cl_half)((i + ulStart) % 0xffff);
|
||||
|
||||
if (0 == ulStart)
|
||||
{
|
||||
size_t tableSize = specialValuesHalf.size()
|
||||
* sizeof(decltype(specialValuesHalf)::value_type);
|
||||
if (sizeof(InType) * size < tableSize)
|
||||
tableSize = sizeof(InType) * size;
|
||||
memcpy((char *)(o + i) - tableSize, &specialValuesHalf.front(),
|
||||
tableSize);
|
||||
}
|
||||
|
||||
if (kUnsaturated == sat)
|
||||
{
|
||||
for (i = 0; i < size; i++) o[i] = clamp(o[i]);
|
||||
}
|
||||
}
|
||||
else if (std::is_integral<InType>::value)
|
||||
{
|
||||
InType *o = (InType *)pIn;
|
||||
if (sizeof(InType) <= sizeof(cl_short))
|
||||
@@ -774,10 +903,10 @@ void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType, typename OutType>
|
||||
InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
|
||||
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
||||
InType DataInfoSpec<InType, OutType, InFP, OutFP>::clamp(const InType &in)
|
||||
{
|
||||
if (std::is_integral<OutType>::value)
|
||||
if (std::is_integral<OutType>::value && !OutFP)
|
||||
{
|
||||
if (std::is_same<InType, cl_float>::value)
|
||||
{
|
||||
@@ -789,6 +918,11 @@ InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
|
||||
return dclamp(clamp_ranges[round].first, in,
|
||||
clamp_ranges[round].second);
|
||||
}
|
||||
else if (std::is_same<InType, cl_half>::value && InFP)
|
||||
{
|
||||
return HFF(fclamp(HTF(clamp_ranges[round].first), HTF(in),
|
||||
HTF(clamp_ranges[round].second)));
|
||||
}
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
@@ -73,9 +73,9 @@ static void PrintUsage(void);
|
||||
test_status InitCL(cl_device_id device);
|
||||
|
||||
|
||||
const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
|
||||
"uint", "int", "float", "double",
|
||||
"ulong", "long" };
|
||||
const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
|
||||
"uint", "int", "half", "float",
|
||||
"double", "ulong", "long" };
|
||||
|
||||
const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
|
||||
"_rtn", "_rtz" };
|
||||
@@ -83,9 +83,9 @@ const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
|
||||
const char *gSaturationNames[2] = { "", "_sat" };
|
||||
|
||||
size_t gTypeSizes[kTypeCount] = {
|
||||
sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
|
||||
sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double),
|
||||
sizeof(cl_ulong), sizeof(cl_long),
|
||||
sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
|
||||
sizeof(cl_uint), sizeof(cl_int), sizeof(cl_half), sizeof(cl_float),
|
||||
sizeof(cl_double), sizeof(cl_ulong), sizeof(cl_long),
|
||||
};
|
||||
|
||||
char appName[64] = "ctest";
|
||||
@@ -221,13 +221,17 @@ static int ParseArgs(int argc, const char **argv)
|
||||
switch (*arg)
|
||||
{
|
||||
case 'd': gTestDouble ^= 1; break;
|
||||
case 'h': gTestHalfs ^= 1; break;
|
||||
case 'l': gSkipTesting ^= 1; break;
|
||||
case 'm': gMultithread ^= 1; break;
|
||||
case 'w': gWimpyMode ^= 1; break;
|
||||
case '[':
|
||||
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||
break;
|
||||
case 'z': gForceFTZ ^= 1; break;
|
||||
case 'z':
|
||||
gForceFTZ ^= 1;
|
||||
gForceHalfFTZ ^= 1;
|
||||
break;
|
||||
case 't': gTimeResults ^= 1; break;
|
||||
case 'a': gReportAverageTimes ^= 1; break;
|
||||
case '1':
|
||||
@@ -355,7 +359,6 @@ static void PrintUsage(void)
|
||||
}
|
||||
|
||||
|
||||
|
||||
test_status InitCL(cl_device_id device)
|
||||
{
|
||||
int error, i;
|
||||
@@ -412,6 +415,50 @@ test_status InitCL(cl_device_id device)
|
||||
}
|
||||
gTestDouble &= gHasDouble;
|
||||
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
gHasHalfs = 1;
|
||||
|
||||
cl_device_fp_config floatCapabilities = 0;
|
||||
if ((error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
|
||||
sizeof(floatCapabilities),
|
||||
&floatCapabilities, NULL)))
|
||||
floatCapabilities = 0;
|
||||
|
||||
if (0 == (CL_FP_DENORM & floatCapabilities)) gForceHalfFTZ ^= 1;
|
||||
|
||||
if (0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST))
|
||||
{
|
||||
char profileStr[128] = "";
|
||||
// Verify that we are an embedded profile device
|
||||
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE,
|
||||
sizeof(profileStr), profileStr, NULL)))
|
||||
{
|
||||
vlog_error("FAILURE: Could not get device profile: error %d\n",
|
||||
error);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
if (strcmp(profileStr, "EMBEDDED_PROFILE"))
|
||||
{
|
||||
vlog_error(
|
||||
"FAILURE: non-embedded profile device does not support "
|
||||
"CL_FP_ROUND_TO_NEAREST\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
if (0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO))
|
||||
{
|
||||
vlog_error("FAILURE: embedded profile device supports neither "
|
||||
"CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
gIsHalfRTZ = 1;
|
||||
}
|
||||
}
|
||||
gTestHalfs &= gHasHalfs;
|
||||
|
||||
// detect whether profile of the device is embedded
|
||||
char profile[1024] = "";
|
||||
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
|
||||
@@ -492,8 +539,12 @@ test_status InitCL(cl_device_id device)
|
||||
vlog("\tSubnormal values supported for floats? %s\n",
|
||||
no_yes[0 != (CL_FP_DENORM & floatCapabilities)]);
|
||||
vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]);
|
||||
vlog("\tTesting with FTZ mode ON for halfs? %s\n",
|
||||
no_yes[0 != gForceHalfFTZ]);
|
||||
vlog("\tTesting with default RTZ mode for floats? %s\n",
|
||||
no_yes[0 != gIsRTZ]);
|
||||
vlog("\tTesting with default RTZ mode for halfs? %s\n",
|
||||
no_yes[0 != gIsHalfRTZ]);
|
||||
vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]);
|
||||
if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]);
|
||||
vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]);
|
||||
@@ -503,5 +554,3 @@ test_status InitCL(cl_device_id device)
|
||||
vlog("\n");
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user