mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
969 lines
36 KiB
C++
969 lines
36 KiB
C++
//
|
|
// Copyright (c) 2023 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#ifndef CONVERSIONS_DATA_INFO_H
|
|
#define CONVERSIONS_DATA_INFO_H
|
|
|
|
#if defined(__APPLE__)
|
|
#include <OpenCL/opencl.h>
|
|
#else
|
|
#include <CL/opencl.h>
|
|
#endif
|
|
|
|
#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
|
|
#include "fplib.h"
|
|
extern bool qcom_sat;
|
|
extern roundingMode qcom_rm;
|
|
#endif
|
|
|
|
#include <CL/cl_half.h>
|
|
|
|
#include "harness/conversions.h"
|
|
#include "harness/mt19937.h"
|
|
#include "harness/rounding_mode.h"
|
|
#include "harness/typeWrappers.h"
|
|
|
|
#include <vector>
|
|
|
|
#if defined(__linux__)
|
|
#include <sys/param.h>
|
|
#include <libgen.h>
|
|
#endif
|
|
|
|
extern size_t gTypeSizes[kTypeCount];
|
|
extern void *gIn;
|
|
|
|
|
|
typedef enum
|
|
{
|
|
kUnsaturated = 0,
|
|
kSaturated,
|
|
|
|
kSaturationModeCount
|
|
} SaturationMode;
|
|
|
|
struct DataInitInfo
|
|
{
|
|
cl_ulong start;
|
|
cl_uint size;
|
|
Type outType;
|
|
Type inType;
|
|
SaturationMode sat;
|
|
RoundingMode round;
|
|
cl_uint threads;
|
|
|
|
static cl_half_rounding_mode halfRoundingMode;
|
|
static std::vector<uint32_t> specialValuesUInt;
|
|
static std::vector<float> specialValuesFloat;
|
|
static std::vector<double> specialValuesDouble;
|
|
static std::vector<cl_half> specialValuesHalf;
|
|
};
|
|
|
|
#define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode)
|
|
#define HTF(num) cl_half_to_float(num)
|
|
#define HFD(num) cl_half_from_double(num, DataInitInfo::halfRoundingMode)
|
|
|
|
struct DataInitBase : public DataInitInfo
|
|
{
|
|
virtual ~DataInitBase() = default;
|
|
|
|
explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {}
|
|
virtual void conv_array(void *out, void *in, size_t n) {}
|
|
virtual void conv_array_sat(void *out, void *in, size_t n) {}
|
|
virtual void init(const cl_uint &, const cl_uint &) {}
|
|
virtual void set_allow_zero_array(uint8_t *allow, void *out, void *in, size_t n) {}
|
|
};
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
struct DataInfoSpec : public DataInitBase
|
|
{
|
|
explicit DataInfoSpec(const DataInitInfo &agg);
|
|
|
|
// helpers
|
|
float round_to_int(float f);
|
|
long long round_to_int_and_clamp(double d);
|
|
|
|
OutType absolute(const OutType &x);
|
|
|
|
// actual conversion of reference values
|
|
void conv(OutType *out, InType *in);
|
|
void conv_sat(OutType *out, InType *in);
|
|
|
|
// Decide if we allow a zero result in addition to the correctly rounded one
|
|
void set_allow_zero(uint8_t *allow, OutType *out, InType *in);
|
|
|
|
// min/max ranges for output type of data
|
|
std::pair<OutType, OutType> ranges;
|
|
|
|
// matrix of clamping ranges for each rounding type
|
|
std::vector<std::pair<InType, InType>> clamp_ranges;
|
|
|
|
std::vector<MTdataHolder> mdv;
|
|
|
|
constexpr bool is_in_half() const
|
|
{
|
|
return (std::is_same<InType, cl_half>::value && InFP);
|
|
}
|
|
|
|
constexpr bool is_out_half() const
|
|
{
|
|
return (std::is_same<OutType, cl_half>::value && OutFP);
|
|
}
|
|
|
|
void conv_array(void *out, void *in, size_t n) override
|
|
{
|
|
for (size_t i = 0; i < n; i++)
|
|
conv(&((OutType *)out)[i], &((InType *)in)[i]);
|
|
}
|
|
|
|
void conv_array_sat(void *out, void *in, size_t n) override
|
|
{
|
|
for (size_t i = 0; i < n; i++)
|
|
conv_sat(&((OutType *)out)[i], &((InType *)in)[i]);
|
|
}
|
|
|
|
void init(const cl_uint &, const cl_uint &) override;
|
|
void set_allow_zero_array(uint8_t *allow, void *out, void *in, size_t n) override {
|
|
for (size_t i = 0; i < n; i++)
|
|
set_allow_zero(&allow[i], &((OutType *)out)[i], &((InType *)in)[i]);
|
|
}
|
|
InType clamp(const InType &);
|
|
inline float fclamp(float lo, float v, float hi)
|
|
{
|
|
v = v < lo ? lo : v;
|
|
return v < hi ? v : hi;
|
|
}
|
|
|
|
inline double dclamp(double lo, double v, double hi)
|
|
{
|
|
v = v < lo ? lo : v;
|
|
return v < hi ? v : hi;
|
|
}
|
|
};
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
DataInfoSpec<InType, OutType, InFP, OutFP>::DataInfoSpec(
|
|
const DataInitInfo &agg)
|
|
: DataInitBase(agg), mdv(0)
|
|
{
|
|
if (std::is_same<cl_float, OutType>::value)
|
|
ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
|
|
else if (std::is_same<cl_double, OutType>::value)
|
|
ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
|
|
else if (std::is_same<cl_half, OutType>::value && OutFP)
|
|
ranges = std::make_pair(HFF(CL_HALF_MIN), HFF(CL_HALF_MAX));
|
|
else if (std::is_same<cl_uchar, OutType>::value)
|
|
ranges = std::make_pair(0, CL_UCHAR_MAX);
|
|
else if (std::is_same<cl_char, OutType>::value)
|
|
ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
|
|
else if (std::is_same<cl_ushort, OutType>::value && !OutFP)
|
|
ranges = std::make_pair(0, CL_USHRT_MAX);
|
|
else if (std::is_same<cl_short, OutType>::value)
|
|
ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
|
|
else if (std::is_same<cl_uint, OutType>::value)
|
|
ranges = std::make_pair(0, CL_UINT_MAX);
|
|
else if (std::is_same<cl_int, OutType>::value)
|
|
ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX);
|
|
else if (std::is_same<cl_ulong, OutType>::value)
|
|
ranges = std::make_pair(0, CL_ULONG_MAX);
|
|
else if (std::is_same<cl_long, OutType>::value)
|
|
ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX);
|
|
|
|
// clang-format off
|
|
// for readability sake keep this section unformatted
|
|
if (std::is_floating_point<InType>::value)
|
|
{ // from float/double
|
|
InType outMin = static_cast<InType>(ranges.first);
|
|
InType outMax = static_cast<InType>(ranges.second);
|
|
|
|
InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
|
|
if (std::is_integral<OutType>::value && !OutFP)
|
|
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
|
if (sizeof(OutType)<=sizeof(cl_short))
|
|
{ // to char/uchar/short/ushort
|
|
clamp_ranges=
|
|
{{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
|
{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
|
|
{outMin-0.0f, outMax - outMax * 0.5f * eps },
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}};
|
|
}
|
|
else if (std::is_same<InType, cl_float>::value)
|
|
{ // from float
|
|
if (std::is_same<OutType, cl_uint>::value)
|
|
{ // to uint
|
|
clamp_ranges=
|
|
{ {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
|
|
{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
|
|
{outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
|
|
{outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) },
|
|
{outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}};
|
|
}
|
|
else if (std::is_same<OutType, cl_int>::value)
|
|
{ // to int
|
|
clamp_ranges=
|
|
{ {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
|
|
{outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
|
|
{outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
|
|
{outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) },
|
|
{outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}};
|
|
}
|
|
else if (std::is_same<OutType, cl_ulong>::value)
|
|
{ // to ulong
|
|
clamp_ranges=
|
|
{{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
|
|
{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
|
|
{outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) },
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}};
|
|
}
|
|
else if (std::is_same<OutType, cl_long>::value)
|
|
{ // to long
|
|
clamp_ranges=
|
|
{ {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
|
|
{MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
|
|
{MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
|
|
{MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
|
|
{MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}};
|
|
}
|
|
}
|
|
else
|
|
{ // from double
|
|
if (std::is_same<OutType, cl_uint>::value)
|
|
{ // to uint
|
|
clamp_ranges=
|
|
{ {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
|
|
{outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
|
|
{outMin-1.0f+0.5f*eps, outMax},
|
|
{outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) },
|
|
{outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}};
|
|
}
|
|
else if (std::is_same<OutType, cl_int>::value)
|
|
{ // to int
|
|
clamp_ranges=
|
|
{ {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
|
|
{outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
|
|
{outMin-1.0f+outMax*eps, outMax},
|
|
{outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps },
|
|
{outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}};
|
|
}
|
|
else if (std::is_same<OutType, cl_ulong>::value)
|
|
{ // to ulong
|
|
clamp_ranges=
|
|
{{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
|
|
{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
|
|
{outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) },
|
|
{outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}};
|
|
}
|
|
else if (std::is_same<OutType, cl_long>::value)
|
|
{ // to long
|
|
clamp_ranges=
|
|
{ {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
|
|
{MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
|
|
{MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
|
|
{MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
|
|
{MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (is_in_half())
|
|
{
|
|
float outMin = static_cast<float>(ranges.first);
|
|
float outMax = static_cast<float>(ranges.second);
|
|
float eps = CL_HALF_EPSILON;
|
|
cl_half_rounding_mode prev_half_round = DataInitInfo::halfRoundingMode;
|
|
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
|
|
|
|
if (std::is_integral<OutType>::value)
|
|
{ // to char/uchar/short/ushort/int/uint/long/ulong
|
|
if (sizeof(OutType)<=sizeof(cl_char) || std::is_same<OutType, cl_short>::value)
|
|
{ // to char/uchar
|
|
clamp_ranges=
|
|
{{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
|
{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
|
|
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax-1.f)},
|
|
{HFF(outMin-0.0f), HFF(outMax - outMax * 0.5f * eps) },
|
|
{HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax - outMax * 0.5f * eps)}};
|
|
}
|
|
else
|
|
{ // to ushort/int/uint/long/ulong
|
|
if (std::is_signed<OutType>::value)
|
|
{
|
|
clamp_ranges=
|
|
{ {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
|
|
{HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}};
|
|
}
|
|
else
|
|
{
|
|
clamp_ranges=
|
|
{ {HFF(outMin), HFF(CL_HALF_MAX)},
|
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
|
{HFF(outMin), HFF(CL_HALF_MAX)},
|
|
{HFF(outMin), HFF(CL_HALF_MAX)}};
|
|
}
|
|
}
|
|
}
|
|
|
|
DataInitInfo::halfRoundingMode = prev_half_round;
|
|
}
|
|
// clang-format on
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
|
{
|
|
static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
|
|
-MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
|
|
|
|
// Round fractional values to integer in round towards nearest mode
|
|
if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
|
|
{
|
|
volatile float x = f;
|
|
float magicVal = magic[f < 0];
|
|
|
|
#if defined(__SSE__)
|
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
|
// incorrectly
|
|
__m128 v = _mm_set_ss(x);
|
|
__m128 m = _mm_set_ss(magicVal);
|
|
v = _mm_add_ss(v, m);
|
|
v = _mm_sub_ss(v, m);
|
|
_mm_store_ss((float *)&x, v);
|
|
#else
|
|
x += magicVal;
|
|
x -= magicVal;
|
|
#endif
|
|
f = x;
|
|
}
|
|
return f;
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
long long
|
|
DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
|
{
|
|
static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
|
|
MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
|
|
|
|
if (f >= -(double)LLONG_MIN) return LLONG_MAX;
|
|
|
|
if (f <= (double)LLONG_MIN) return LLONG_MIN;
|
|
|
|
// Round fractional values to integer in round towards nearest mode
|
|
if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
|
|
{
|
|
volatile double x = f;
|
|
double magicVal = magic[f < 0];
|
|
#if defined(__SSE2__) || defined(_MSC_VER)
|
|
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
|
// incorrectly
|
|
__m128d v = _mm_set_sd(x);
|
|
__m128d m = _mm_set_sd(magicVal);
|
|
v = _mm_add_sd(v, m);
|
|
v = _mm_sub_sd(v, m);
|
|
_mm_store_sd((double *)&x, v);
|
|
#else
|
|
x += magicVal;
|
|
x -= magicVal;
|
|
#endif
|
|
f = x;
|
|
}
|
|
return (long long)f;
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
OutType DataInfoSpec<InType, OutType, InFP, OutFP>::absolute(const OutType &x)
|
|
{
|
|
union {
|
|
cl_uint u;
|
|
OutType f;
|
|
} u;
|
|
u.f = x;
|
|
if (std::is_same<OutType, float>::value)
|
|
u.u &= 0x7fffffff;
|
|
else if (std::is_same<OutType, double>::value)
|
|
u.u &= 0x7fffffffffffffffULL;
|
|
else
|
|
log_error("Unexpected argument type of DataInfoSpec::absolute");
|
|
|
|
return u.f;
|
|
}
|
|
|
|
template <typename T, bool fp> constexpr bool is_half()
|
|
{
|
|
return (std::is_same<cl_half, T>::value && fp);
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
|
{
|
|
if (std::is_same<cl_float, InType>::value || is_in_half())
|
|
{
|
|
cl_float inVal = *in;
|
|
if (std::is_same<cl_half, InType>::value)
|
|
{
|
|
inVal = HTF(*in);
|
|
}
|
|
|
|
if (std::is_floating_point<OutType>::value)
|
|
{
|
|
*out = (OutType)inVal;
|
|
}
|
|
else if (is_out_half())
|
|
{
|
|
*out = HFF(*in);
|
|
}
|
|
else if (std::is_same<cl_ulong, OutType>::value)
|
|
{
|
|
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
|
// VS2005 (at least) on x86 uses fistp to store the float as a
|
|
// 64-bit int. However, fistp stores it as a signed int, and some of
|
|
// the test values won't fit into a signed int. (These test values
|
|
// are >= 2^63.) The result on VS2005 is that these end up silently
|
|
// (at least by default settings) clamped to the max lowest ulong.
|
|
cl_float x = round_to_int(inVal);
|
|
if (x >= 9223372036854775808.0f)
|
|
{
|
|
x -= 9223372036854775808.0f;
|
|
((cl_ulong *)out)[0] = x;
|
|
((cl_ulong *)out)[0] += 9223372036854775808ULL;
|
|
}
|
|
else
|
|
{
|
|
((cl_ulong *)out)[0] = x;
|
|
}
|
|
#else
|
|
*out = round_to_int(inVal);
|
|
#endif
|
|
}
|
|
else if (std::is_same<cl_long, OutType>::value)
|
|
{
|
|
*out = round_to_int_and_clamp(inVal);
|
|
}
|
|
else
|
|
*out = round_to_int(inVal);
|
|
}
|
|
else if (std::is_same<cl_double, InType>::value)
|
|
{
|
|
if (std::is_same<cl_float, OutType>::value)
|
|
*out = (OutType)*in;
|
|
else if (is_out_half())
|
|
*out = static_cast<OutType>(HFD(*in));
|
|
else
|
|
*out = rint(*in);
|
|
}
|
|
else if (std::is_same<cl_ulong, InType>::value
|
|
|| std::is_same<cl_long, InType>::value)
|
|
{
|
|
if (std::is_same<cl_double, OutType>::value)
|
|
{
|
|
#if defined(_MSC_VER)
|
|
double result;
|
|
|
|
if (std::is_same<cl_ulong, InType>::value)
|
|
{
|
|
cl_ulong l = ((cl_ulong *)in)[0];
|
|
cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
|
|
: (cl_long)l;
|
|
#if defined(_M_X64)
|
|
_mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
|
|
#else
|
|
result = sl;
|
|
#endif
|
|
((double *)out)[0] =
|
|
(l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
|
|
}
|
|
else
|
|
{
|
|
cl_long l = ((cl_long *)in)[0];
|
|
#if defined(_M_X64)
|
|
_mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
|
|
#else
|
|
result = l;
|
|
#endif
|
|
((double *)out)[0] =
|
|
(l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's
|
|
// always convert to +0.0
|
|
}
|
|
#else
|
|
// Use volatile to prevent optimization by Clang compiler
|
|
volatile InType vi = *in;
|
|
*out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
|
|
#endif
|
|
}
|
|
else if (std::is_same<cl_float, OutType>::value || is_out_half())
|
|
{
|
|
cl_float outVal = 0.f;
|
|
|
|
#if defined(_MSC_VER) && defined(_M_X64)
|
|
float result;
|
|
if (std::is_same<cl_ulong, InType>::value)
|
|
{
|
|
cl_ulong l = ((cl_ulong *)in)[0];
|
|
cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
|
|
: (cl_long)l;
|
|
_mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
|
|
outVal = (l == 0 ? 0.0f
|
|
: (((cl_long)l < 0) ? result * 2.0f : result));
|
|
}
|
|
else
|
|
{
|
|
cl_long l = ((cl_long *)in)[0];
|
|
_mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
|
|
outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1,
|
|
// 0's always convert to +0.0
|
|
}
|
|
#else
|
|
InType l = ((InType *)in)[0];
|
|
#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
|
|
/* ARM VFP doesn't have hardware instruction for converting from
|
|
* 64-bit integer to float types, hence GCC ARM uses the
|
|
* floating-point emulation code despite which -mfloat-abi setting
|
|
* it is. But the emulation code in libgcc.a has only one rounding
|
|
* mode (round to nearest even in this case) and ignores the user
|
|
* rounding mode setting in hardware. As a result setting rounding
|
|
* modes in hardware won't give correct rounding results for type
|
|
* covert from 64-bit integer to float using GCC for ARM compiler so
|
|
* for testing different rounding modes, we need to use alternative
|
|
* reference function. ARM64 does have an instruction, however we
|
|
* cannot guarantee the compiler will use it. On all ARM
|
|
* architechures use emulation to calculate reference.*/
|
|
if (std::is_same<cl_ulong, InType>::value)
|
|
outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
|
|
else
|
|
outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
|
|
#else
|
|
outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's
|
|
// always convert to +0.0
|
|
#endif
|
|
#endif
|
|
|
|
*out = std::is_same<cl_half, OutType>::value
|
|
? static_cast<OutType>(HFF(outVal))
|
|
: outVal;
|
|
}
|
|
else
|
|
{
|
|
*out = (OutType)*in;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (std::is_same<cl_float, OutType>::value)
|
|
{
|
|
// Use volatile to prevent optimization by Clang compiler
|
|
volatile InType vi = *in;
|
|
// Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
|
|
*out = (vi == 0 ? 0.0f : vi);
|
|
}
|
|
else if (std::is_same<cl_double, OutType>::value)
|
|
{
|
|
// Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
|
|
*out = (*in == 0 ? 0.0 : *in);
|
|
}
|
|
else if (is_out_half())
|
|
*out = static_cast<OutType>(HFF(*in == 0 ? 0.f : *in));
|
|
else
|
|
{
|
|
*out = (OutType)*in;
|
|
}
|
|
}
|
|
}
|
|
|
|
#define CLAMP(_lo, _x, _hi) \
|
|
((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::conv_sat(OutType *out,
|
|
InType *in)
|
|
{
|
|
if (std::is_floating_point<InType>::value || is_in_half())
|
|
{
|
|
cl_float inVal = *in;
|
|
if (is_in_half()) inVal = HTF(*in);
|
|
|
|
if (std::is_floating_point<OutType>::value || is_out_half())
|
|
{ // in half/float/double, out half/float/double
|
|
if (is_out_half())
|
|
*out = static_cast<OutType>(HFF(inVal));
|
|
else
|
|
*out = (OutType)(is_in_half() ? inVal : *in);
|
|
}
|
|
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
|
&& std::is_same<cl_ulong, OutType>::value)
|
|
{
|
|
cl_float x = round_to_int(is_in_half() ? HTF(*in) : *in);
|
|
|
|
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
|
// VS2005 (at least) on x86 uses fistp to store the float as a
|
|
// 64-bit int. However, fistp stores it as a signed int, and some of
|
|
// the test values won't fit into a signed int. (These test values
|
|
// are >= 2^63.) The result on VS2005 is that these end up silently
|
|
// (at least by default settings) clamped to the max lowest ulong.
|
|
if (x >= 18446744073709551616.0f)
|
|
{ // 2^64
|
|
*out = 0xFFFFFFFFFFFFFFFFULL;
|
|
}
|
|
else if (x < 0)
|
|
{
|
|
*out = 0;
|
|
}
|
|
else if (x >= 9223372036854775808.0f)
|
|
{ // 2^63
|
|
x -= 9223372036854775808.0f;
|
|
*out = x;
|
|
*out += 9223372036854775808ULL;
|
|
}
|
|
else
|
|
{
|
|
*out = x;
|
|
}
|
|
#else
|
|
*out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
|
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
|
: x < 0 ? 0
|
|
: (OutType)x;
|
|
#endif
|
|
}
|
|
else if ((std::is_same<InType, cl_float>::value || is_in_half())
|
|
&& std::is_same<cl_long, OutType>::value)
|
|
{
|
|
cl_float f = round_to_int(is_in_half() ? HTF(*in) : *in);
|
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
|
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
|
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
|
? (OutType)0x8000000000000000LL
|
|
: (OutType)f;
|
|
}
|
|
else if (std::is_same<InType, cl_double>::value
|
|
&& std::is_same<cl_ulong, OutType>::value)
|
|
{
|
|
InType f = rint(*in);
|
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
|
|
? (OutType)0xFFFFFFFFFFFFFFFFULL
|
|
: f < 0 ? 0
|
|
: (OutType)f;
|
|
}
|
|
else if (std::is_same<InType, cl_double>::value
|
|
&& std::is_same<cl_long, OutType>::value)
|
|
{
|
|
InType f = rint(*in);
|
|
*out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
|
|
? (OutType)0x7FFFFFFFFFFFFFFFULL
|
|
: f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
|
|
? (OutType)0x8000000000000000LL
|
|
: (OutType)f;
|
|
}
|
|
else
|
|
{ // in half/float/double, out char/uchar/short/ushort/int/uint
|
|
*out = CLAMP(ranges.first,
|
|
round_to_int_and_clamp(is_in_half() ? inVal : *in),
|
|
ranges.second);
|
|
}
|
|
}
|
|
else if (std::is_integral<InType>::value
|
|
&& std::is_integral<OutType>::value)
|
|
{
|
|
if (is_out_half())
|
|
{
|
|
*out = std::is_signed<InType>::value
|
|
? static_cast<OutType>(HFF((cl_float)*in))
|
|
: absolute(static_cast<OutType>(HFF((cl_float)*in)));
|
|
}
|
|
else
|
|
{
|
|
if ((std::is_signed<InType>::value
|
|
&& std::is_signed<OutType>::value)
|
|
|| (!std::is_signed<InType>::value
|
|
&& !std::is_signed<OutType>::value))
|
|
{
|
|
if (sizeof(InType) <= sizeof(OutType))
|
|
{
|
|
*out = (OutType)*in;
|
|
}
|
|
else
|
|
{
|
|
*out = CLAMP(ranges.first, *in, ranges.second);
|
|
}
|
|
}
|
|
else
|
|
{ // mixed signed/unsigned types
|
|
if (sizeof(InType) < sizeof(OutType))
|
|
{
|
|
*out = (!std::is_signed<InType>::value)
|
|
? (OutType)*in
|
|
: CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in
|
|
}
|
|
else
|
|
{ // bigger/equal mixed signed/unsigned types - always clamp
|
|
*out = CLAMP(0, *in, ranges.second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{ // InType integral, OutType floating
|
|
*out = std::is_signed<InType>::value ? (OutType)*in
|
|
: absolute((OutType)*in);
|
|
}
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::set_allow_zero(uint8_t *allow,
|
|
OutType *out,
|
|
InType *in)
|
|
{
|
|
// from double
|
|
if (std::is_same<InType, cl_double>::value)
|
|
*allow |= IsDoubleSubnormal(*in);
|
|
// from float
|
|
if (std::is_same<InType, cl_float>::value)
|
|
*allow |= IsFloatSubnormal(*in);
|
|
// from half
|
|
if (is_in_half())
|
|
*allow |= IsHalfSubnormal(*in);
|
|
|
|
// handle the cases that the converted result is subnormal
|
|
// from double
|
|
if (std::is_same<OutType, cl_double>::value)
|
|
*allow |= IsDoubleSubnormal(*out);
|
|
// from float
|
|
if (std::is_same<OutType, cl_float>::value)
|
|
*allow |= IsFloatSubnormal(*out);
|
|
// from half
|
|
if (is_out_half())
|
|
*allow |= IsHalfSubnormal(*out);
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
void DataInfoSpec<InType, OutType, InFP, OutFP>::init(const cl_uint &job_id,
|
|
const cl_uint &thread_id)
|
|
{
|
|
uint64_t ulStart = start;
|
|
void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
|
|
|
|
if (is_in_half())
|
|
{
|
|
cl_half *o = (cl_half *)pIn;
|
|
int i;
|
|
|
|
if (gIsEmbedded)
|
|
for (i = 0; i < size; i++)
|
|
o[i] = (cl_half)genrand_int32(mdv[thread_id]);
|
|
else
|
|
for (i = 0; i < size; i++) o[i] = (cl_half)((i + ulStart) % 0xffff);
|
|
|
|
if (0 == ulStart)
|
|
{
|
|
size_t tableSize = specialValuesHalf.size()
|
|
* sizeof(decltype(specialValuesHalf)::value_type);
|
|
if (sizeof(InType) * size < tableSize)
|
|
tableSize = sizeof(InType) * size;
|
|
memcpy((char *)(o + i) - tableSize, &specialValuesHalf.front(),
|
|
tableSize);
|
|
}
|
|
|
|
if (kUnsaturated == sat)
|
|
{
|
|
for (i = 0; i < size; i++) o[i] = clamp(o[i]);
|
|
}
|
|
}
|
|
else if (std::is_integral<InType>::value)
|
|
{
|
|
InType *o = (InType *)pIn;
|
|
if (sizeof(InType) <= sizeof(cl_short))
|
|
{ // char/uchar/ushort/short
|
|
for (int i = 0; i < size; i++) o[i] = ulStart++;
|
|
}
|
|
else if (sizeof(InType) <= sizeof(cl_int))
|
|
{ // int/uint
|
|
int i = 0;
|
|
if (gIsEmbedded)
|
|
for (i = 0; i < size; i++)
|
|
o[i] = (InType)genrand_int32(mdv[thread_id]);
|
|
else
|
|
for (i = 0; i < size; i++) o[i] = (InType)i + ulStart;
|
|
|
|
if (0 == ulStart)
|
|
{
|
|
size_t tableSize = specialValuesUInt.size()
|
|
* sizeof(decltype(specialValuesUInt)::value_type);
|
|
if (sizeof(InType) * size < tableSize)
|
|
tableSize = sizeof(InType) * size;
|
|
memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(),
|
|
tableSize);
|
|
}
|
|
}
|
|
else
|
|
{ // long/ulong
|
|
cl_ulong *o = (cl_ulong *)pIn;
|
|
cl_ulong i, j, k;
|
|
|
|
i = 0;
|
|
if (ulStart == 0)
|
|
{
|
|
// Try various powers of two
|
|
for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
|
|
o[j] = (cl_ulong)1 << j;
|
|
i = j;
|
|
|
|
// try the complement of those
|
|
for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
|
|
o[i++] = ~((cl_ulong)1 << j);
|
|
|
|
// Try various negative powers of two
|
|
for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
|
|
o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j;
|
|
|
|
// try various powers of two plus 1, shifted by various amounts
|
|
for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
|
|
for (k = 0;
|
|
i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
|
|
k++)
|
|
o[i++] = (((cl_ulong)1 << j) + 1) << k;
|
|
|
|
// try various powers of two minus 1
|
|
for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
|
|
for (k = 0;
|
|
i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
|
|
k++)
|
|
o[i++] = (((cl_ulong)1 << j) - 1) << k;
|
|
|
|
// Other patterns
|
|
cl_ulong pattern[] = {
|
|
0x3333333333333333ULL, 0x5555555555555555ULL,
|
|
0x9999999999999999ULL, 0x6666666666666666ULL,
|
|
0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL
|
|
};
|
|
cl_ulong mask[] = { 0xffffffffffffffffULL,
|
|
0xff00ff00ff00ff00ULL,
|
|
0xffff0000ffff0000ULL,
|
|
0xffffffff00000000ULL };
|
|
for (j = 0; i < (cl_ulong)size
|
|
&& j < sizeof(pattern) / sizeof(pattern[0]);
|
|
j++)
|
|
for (k = 0; i + 2 <= (cl_ulong)size
|
|
&& k < sizeof(mask) / sizeof(mask[0]);
|
|
k++)
|
|
{
|
|
o[i++] = pattern[j] & mask[k];
|
|
o[i++] = pattern[j] & ~mask[k];
|
|
}
|
|
}
|
|
|
|
auto &md = mdv[thread_id];
|
|
for (; i < (cl_ulong)size; i++)
|
|
o[i] = (cl_ulong)genrand_int32(md)
|
|
| ((cl_ulong)genrand_int32(md) << 32);
|
|
}
|
|
} // integrals
|
|
else if (std::is_same<InType, cl_float>::value)
|
|
{
|
|
cl_uint *o = (cl_uint *)pIn;
|
|
int i;
|
|
|
|
if (gIsEmbedded)
|
|
for (i = 0; i < size; i++)
|
|
o[i] = (cl_uint)genrand_int32(mdv[thread_id]);
|
|
else
|
|
for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart;
|
|
|
|
if (0 == ulStart)
|
|
{
|
|
size_t tableSize = specialValuesFloat.size()
|
|
* sizeof(decltype(specialValuesFloat)::value_type);
|
|
if (sizeof(InType) * size < tableSize)
|
|
tableSize = sizeof(InType) * size;
|
|
memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(),
|
|
tableSize);
|
|
}
|
|
|
|
if (kUnsaturated == sat)
|
|
{
|
|
InType *f = (InType *)pIn;
|
|
for (i = 0; i < size; i++) f[i] = clamp(f[i]);
|
|
}
|
|
}
|
|
else if (std::is_same<InType, cl_double>::value)
|
|
{
|
|
InType *o = (InType *)pIn;
|
|
int i = 0;
|
|
|
|
union {
|
|
uint64_t u;
|
|
InType d;
|
|
} u;
|
|
|
|
for (i = 0; i < size; i++)
|
|
{
|
|
uint64_t z = i + ulStart;
|
|
|
|
uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32));
|
|
// split 0x89abcdef to 0x89abc00000000def
|
|
u.u = bits & 0xfffU;
|
|
u.u |= (uint64_t)(bits & ~0xfffU) << 32;
|
|
// sign extend the leading bit of def segment as sign bit so that
|
|
// the middle region consists of either all 1s or 0s
|
|
u.u -= (bits & 0x800U) << 1;
|
|
o[i] = u.d;
|
|
}
|
|
|
|
if (0 == ulStart)
|
|
{
|
|
size_t tableSize = specialValuesDouble.size()
|
|
* sizeof(decltype(specialValuesDouble)::value_type);
|
|
if (sizeof(InType) * size < tableSize)
|
|
tableSize = sizeof(InType) * size;
|
|
memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(),
|
|
tableSize);
|
|
}
|
|
|
|
if (0 == sat)
|
|
for (i = 0; i < size; i++) o[i] = clamp(o[i]);
|
|
}
|
|
}
|
|
|
|
template <typename InType, typename OutType, bool InFP, bool OutFP>
|
|
InType DataInfoSpec<InType, OutType, InFP, OutFP>::clamp(const InType &in)
|
|
{
|
|
if (std::is_integral<OutType>::value && !OutFP)
|
|
{
|
|
if (std::is_same<InType, cl_float>::value)
|
|
{
|
|
return fclamp(clamp_ranges[round].first, in,
|
|
clamp_ranges[round].second);
|
|
}
|
|
else if (std::is_same<InType, cl_double>::value)
|
|
{
|
|
return dclamp(clamp_ranges[round].first, in,
|
|
clamp_ranges[round].second);
|
|
}
|
|
else if (std::is_same<InType, cl_half>::value && InFP)
|
|
{
|
|
return HFF(fclamp(HTF(clamp_ranges[round].first), HTF(in),
|
|
HTF(clamp_ranges[round].second)));
|
|
}
|
|
}
|
|
return in;
|
|
}
|
|
|
|
#endif /* CONVERSIONS_DATA_INFO_H */
|