Files
OpenCL-CTS/test_conformance/conversions/basic_test_conversions.cpp
banan328 8974d74db7 conversions: fix ZeroNanToIntCases from https://github.com/KhronosGroup/OpenCL-CTS/pull/1975 (#2030)
The handling of NaN values in the templated function was incorrect due
to improper initialization of the input data source. Specifically, the
function ZeroNanToIntCases used a global pointer gIn, which was not
correctly set or did not point to the same data as the local input
pointer s used in the non-templated implementation.

to solve the issue I updated the templated function ZeroNanToIntCases to
take an additional parameter for the input data source.
and then passed the correct input data (s) to the templated function
during its invocation.

Co-authored-by: Banan Ashkar <banan.ashkar@mobileye.com>
2024-08-06 09:21:55 -07:00

1643 lines
65 KiB
C++

//
// Copyright (c) 2017-2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
#include "harness/compat.h"
#include "harness/ThreadPool.h"
#if defined(__APPLE__)
#include <sys/sysctl.h>
#include <mach/mach_time.h>
#endif
#if defined(__linux__)
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/sysctl.h>
#endif
#if defined(__linux__)
#include <sys/param.h>
#include <libgen.h>
#endif
#if defined(__MINGW32__)
#include <sys/param.h>
#endif
#include <sstream>
#include <stdarg.h>
#if !defined(_WIN32)
#include <libgen.h>
#include <sys/mman.h>
#endif
#include <time.h>
#include <algorithm>
#include <vector>
#include <type_traits>
#include <cmath>
#include "basic_test_conversions.h"
#if defined(_WIN32)
#include <mmintrin.h>
#include <emmintrin.h>
#else // !_WIN32
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#endif // _WIN32
cl_context gContext = NULL;
cl_command_queue gQueue = NULL;
int gStartTestNumber = -1;
int gEndTestNumber = 0;
#if defined(__APPLE__)
int gTimeResults = 1;
#else
int gTimeResults = 0;
#endif
int gReportAverageTimes = 0;
void *gIn = NULL;
void *gRef = NULL;
void *gAllowZ = NULL;
void *gOut[kCallStyleCount] = { NULL };
cl_mem gInBuffer;
cl_mem gOutBuffers[kCallStyleCount];
size_t gComputeDevices = 0;
uint32_t gDeviceFrequency = 0;
int gWimpyMode = 0;
int gWimpyReductionFactor = 128;
int gSkipTesting = 0;
int gForceFTZ = 0;
int gIsRTZ = 0;
int gForceHalfFTZ = 0;
int gIsHalfRTZ = 0;
uint32_t gSimdSize = 1;
int gHasDouble = 0;
int gTestDouble = 1;
int gHasHalfs = 0;
int gTestHalfs = 1;
const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
int gMinVectorSize = 0;
int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
MTdata gMTdata;
const char **argList = NULL;
int argCount = 0;
double SubtractTime(uint64_t endTime, uint64_t startTime);
cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
// clang-format off
// for readability sake keep this section unformatted
std::vector<unsigned int> DataInitInfo::specialValuesUInt = {
uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2),
uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)),
uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3),
uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1),
uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3),
uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1),
uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3),
uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1),
uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3),
uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1),
uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3),
uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1),
uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3),
uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1),
uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3),
uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1),
uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3),
uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1),
uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3),
uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4,
(1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3,
(1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3,
(1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3,
(1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3,
(1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3,
(1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3,
(1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3,
(1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3,
(1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3,
INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX
};
std::vector<float> DataInitInfo::specialValuesFloat = {
-NAN, -INFINITY, -FLT_MAX,
MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-1000.f, -100.f, -4.0f, -3.5f, -3.0f,
MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f,
MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f,
MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f,
MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f,
MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f,
MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f,
MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN,
MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX,
MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
+1000.f, +100.f, +4.0f, +3.5f, +3.0f,
MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f,
MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
};
// A table of more difficult cases to get right
std::vector<double> DataInitInfo::specialValuesDouble = {
-NAN, -INFINITY, -DBL_MAX,
MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12),
MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59),
MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28),
MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27),
MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-1000., -100., -4.0, -3.5, -3.0,
MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5,
MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0,
MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5,
MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0,
MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5,
MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25,
MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-DBL_MIN,
MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59),
MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28),
MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27),
MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
+1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5,
MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
+1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
+1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
+0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
+0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
+DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10),
MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10),
MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1),
MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1),
MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1),
MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0),
MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0),
MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0),
MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1),
MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1),
MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1),
MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0),
MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52),
MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0),
MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1),
MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53),
MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1),
MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2),
MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54),
MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2),
MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10),
MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10),
MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
};
// A table of more difficult cases to get right
std::vector<cl_half> DataInitInfo::specialValuesHalf = {
0xffff,
0x0000,
0x0001,
0x7c00, /*INFINITY*/
0xfc00, /*-INFINITY*/
0x8000, /*-0*/
0x7bff, /*HALF_MAX*/
0x0400, /*HALF_MIN*/
0x03ff, /* Largest denormal */
0x3c00, /* 1 */
0xbc00, /* -1 */
0x3555, /*nearest value to 1/3*/
0x3bff, /*largest number less than one*/
0xc000, /* -2 */
0xfbff, /* -HALF_MAX */
0x8400, /* -HALF_MIN */
0x4248, /* M_PI_H */
0xc248, /* -M_PI_H */
0xbbff, /* Largest negative fraction */
};
// clang-format on
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
// (that's x87 default state). This causes problems with the tests that
// convert long and ulong to float and double or otherwise deal with values
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
static inline void Force64BitFPUPrecision(void)
{
#if __MINGW32__
// The usual method is to use _controlfp as follows:
// #include <float.h>
// _controlfp(_PC_64, _MCW_PC);
//
// _controlfp is available on MinGW32 but not on MinGW64. Instead of having
// divergent code just use inline assembly which works for both.
unsigned short int orig_cw = 0;
unsigned short int new_cw = 0;
__asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
new_cw = orig_cw | 0x0300; // set precision to 64-bit
__asm__ __volatile__("fldcw %0" ::"m"(new_cw));
#else
/* Implement for other platforms if needed */
#endif
}
template <typename InType, typename OutType, bool InFP, bool OutFP>
int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
uint32_t count,
int vectorSize)
{
const cl_uchar *a = (const cl_uchar *)gAllowZ;
if (is_half<OutType, OutFP>())
{
const cl_half *t = (const cl_half *)test;
const cl_half *c = (const cl_half *)gRef;
for (uint32_t i = 0; i < count; i++)
if (t[i] != c[i] &&
// Allow nan's to be binary different
!((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
{
vlog(
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
vectorSize, i, HTF(c[i]), HTF(t[i]));
return i + 1;
}
}
else if (std::is_integral<OutType>::value)
{ // char/uchar/short/ushort/half/int/uint/long/ulong
const OutType *t = (const OutType *)test;
const OutType *c = (const OutType *)gRef;
for (uint32_t i = 0; i < count; i++)
if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0))
{
size_t s = sizeof(OutType) * 2;
std::stringstream sstr;
sstr << "\nError for vector size %d found at 0x%8.8x: *0x%"
<< s << "." << s << "x vs 0x%" << s << "." << s << "x\n";
vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]);
return i + 1;
}
}
else if (std::is_same<OutType, cl_float>::value)
{
// cast to integral - from original test
const cl_uint *t = (const cl_uint *)test;
const cl_uint *c = (const cl_uint *)gRef;
for (uint32_t i = 0; i < count; i++)
if (t[i] != c[i] &&
// Allow nan's to be binary different
!((t[i] & 0x7fffffffU) > 0x7f800000U
&& (c[i] & 0x7fffffffU) > 0x7f800000U)
&& !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U)))
{
vlog(
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
return i + 1;
}
}
else
{
const cl_ulong *t = (const cl_ulong *)test;
const cl_ulong *c = (const cl_ulong *)gRef;
for (uint32_t i = 0; i < count; i++)
if (t[i] != c[i] &&
// Allow nan's to be binary different
!((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL
&& (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL)
&& !(a[i] != (cl_uchar)0
&& t[i] == (c[i] & 0x8000000000000000ULL)))
{
vlog(
"\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
return i + 1;
}
}
return 0;
}
cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
{
if (0 == (x & (x - 1))) return x;
while (x & (x - 1)) x &= x - 1;
return x + x;
}
cl_int CustomConversionsTest::Run()
{
int startMinVectorSize = gMinVectorSize;
Type inType, outType;
RoundingMode round;
SaturationMode sat;
for (int i = 0; i < argCount; i++)
{
if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round))
{
vlog_error("\n\t\t**** ERROR: Unable to parse function name "
"%s. Skipping.... *****\n\n",
argList[i]);
continue;
}
// skip double if we don't have it
if (!gTestDouble && (inType == kdouble || outType == kdouble))
{
if (gHasDouble)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
"off.\n");
}
continue;
}
// skip half if we don't have it
if (!gTestHalfs && (inType == khalf || outType == khalf))
{
if (gHasHalfs)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
"off.\n");
}
continue;
}
// skip longs on embedded
if (!gHasLong
&& (inType == klong || outType == klong || inType == kulong
|| outType == kulong))
{
continue;
}
// Skip the implicit converts if the rounding mode is not default or
// test is saturated
if (0 == startMinVectorSize)
{
if (sat || round != kDefaultRoundingMode)
gMinVectorSize = 1;
else
gMinVectorSize = 0;
}
IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round,
sat);
iter.Run();
if (gFailCount)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
}
}
return gFailCount;
}
ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: context(context), device(device), queue(queue), num_elements(0),
typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
cl_double(0), cl_ulong(0), cl_long(0) })
{}
cl_int ConversionsTest::Run()
{
IterOverTypes iter(typeIterator, *this);
iter.Run();
return gFailCount;
}
cl_int ConversionsTest::SetUp(int elements)
{
num_elements = elements;
if (is_extension_available(device, "cl_khr_fp16"))
{
const cl_device_fp_config fpConfigHalf =
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
{
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
}
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
{
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
}
else
{
log_error("Error while acquiring half rounding mode");
return TEST_FAIL;
}
}
return CL_SUCCESS;
}
template <typename InType, typename OutType, bool InFP, bool OutFP>
void ConversionsTest::TestTypesConversion(const Type &inType,
const Type &outType, int &testNumber,
int startMinVectorSize)
{
SaturationMode sat;
RoundingMode round;
int error;
// skip longs on embedded
if (!gHasLong
&& (inType == klong || outType == klong || inType == kulong
|| outType == kulong))
{
return;
}
for (sat = (SaturationMode)0; sat < kSaturationModeCount;
sat = (SaturationMode)(sat + 1))
{
// skip illegal saturated conversions to float type
if (kSaturated == sat
&& (outType == kfloat || outType == kdouble || outType == khalf))
{
continue;
}
for (round = (RoundingMode)0; round < kRoundingModeCount;
round = (RoundingMode)(round + 1))
{
if (++testNumber < gStartTestNumber)
{
continue;
}
else
{
if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return;
}
vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber,
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
// skip double if we don't have it
if (!gTestDouble && (inType == kdouble || outType == kdouble))
{
if (gHasDouble)
{
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
"FAILED ** \n",
testNumber, gTypeNames[outType],
gSaturationNames[sat], gRoundingModeNames[round],
gTypeNames[inType]);
vlog("\t\tcl_khr_fp64 enabled, but double "
"testing turned off.\n");
}
continue;
}
// skip half if we don't have it
if (!gTestHalfs && (inType == khalf || outType == khalf))
{
if (gHasHalfs)
{
vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
gTypeNames[outType], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType]);
vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
"off.\n");
}
continue;
}
// Skip the implicit converts if the rounding mode is
// not default or test is saturated
if (0 == startMinVectorSize)
{
if (sat || round != kDefaultRoundingMode)
gMinVectorSize = 1;
else
gMinVectorSize = 0;
}
if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
sat, round)))
{
vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
"FAILED ** \n",
testNumber, gTypeNames[outType],
gSaturationNames[sat], gRoundingModeNames[round],
gTypeNames[inType]);
}
}
}
}
template <typename InType, typename OutType, bool InFP, bool OutFP>
int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
RoundingMode round)
{
#ifdef __APPLE__
cl_ulong wall_start = mach_absolute_time();
#endif
cl_uint threads = GetThreadCount();
DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
WriteInputBufferInfo writeInputBufferInfo;
int vectorSize;
int error = 0;
uint64_t i;
gTestCount++;
size_t blockCount =
BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
size_t step = blockCount;
for (i = 0; i < threads; i++)
{
init_info.mdv.emplace_back(MTdataHolder(gRandomSeed));
}
writeInputBufferInfo.outType = outType;
writeInputBufferInfo.inType = inType;
writeInputBufferInfo.calcInfo.resize(gMaxVectorSize);
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
{
writeInputBufferInfo.calcInfo[vectorSize].reset(
new CalcRefValsPat<InType, OutType, InFP, OutFP>());
writeInputBufferInfo.calcInfo[vectorSize]->program =
conv_test::MakeProgram(
outType, inType, sat, round, vectorSize,
&writeInputBufferInfo.calcInfo[vectorSize]->kernel);
if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program)
{
gFailCount++;
return -1;
}
if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel)
{
gFailCount++;
vlog_error("\t\tFAILED -- Failed to create kernel.\n");
return -2;
}
writeInputBufferInfo.calcInfo[vectorSize]->parent =
&writeInputBufferInfo;
writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize;
writeInputBufferInfo.calcInfo[vectorSize]->result = -1;
}
if (gSkipTesting) return error;
// Patch up rounding mode if default is RTZ
// We leave the part above in default rounding mode so that the right kernel
// is compiled.
if (std::is_same<OutType, cl_float>::value)
{
if (round == kDefaultRoundingMode && gIsRTZ)
init_info.round = round = kRoundTowardZero;
}
else if (std::is_same<OutType, cl_half>::value && OutFP)
{
if (round == kDefaultRoundingMode && gIsHalfRTZ)
init_info.round = round = kRoundTowardZero;
}
// Figure out how many elements are in a work block
// we handle 64-bit types a bit differently.
uint64_t lastCase = (8 * gTypeSizes[inType] > 32)
? 0x100000000ULL
: 1ULL << (8 * gTypeSizes[inType]);
if (!gWimpyMode && gIsEmbedded)
step = blockCount * EMBEDDED_REDUCTION_FACTOR;
if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
vlog("Testing... ");
fflush(stdout);
for (i = 0; i < (uint64_t)lastCase; i += step)
{
if (0 == (i & ((lastCase >> 3) - 1)))
{
vlog(".");
fflush(stdout);
}
cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
writeInputBufferInfo.count = count;
// Crate a user event to represent the status of the reference value
// computation completion
writeInputBufferInfo.calcReferenceValues =
clCreateUserEvent(gContext, &error);
if (error || NULL == writeInputBufferInfo.calcReferenceValues)
{
vlog_error("ERROR: Unable to create user event. (%d)\n", error);
gFailCount++;
return error;
}
// retain for consumption by MapOutputBufferComplete
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
vectorSize++)
{
if ((error =
clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
{
vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
gFailCount++;
return error;
}
}
// Crate a user event to represent when the callbacks are done verifying
// correctness
writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
if (error || NULL == writeInputBufferInfo.doneBarrier)
{
vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
error);
gFailCount++;
return error;
}
// retain for use by the callback that calls this
if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
{
vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
error);
gFailCount++;
return error;
}
// Call this in a multithreaded manner
cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
init_info.start = i;
init_info.size = count / chunks;
if (init_info.size < 16384)
{
chunks = RoundUpToNextPowerOfTwo(threads);
init_info.size = count / chunks;
if (init_info.size < 16384)
{
init_info.size = count;
chunks = 1;
}
}
ThreadPool_Do(conv_test::InitData, chunks, &init_info);
// Copy the results to the device
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
count * gTypeSizes[inType], gIn, 0,
NULL, NULL)))
{
vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
gFailCount++;
return error;
}
// Call completion callback for the write, which will enqueue the rest
// of the work.
conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo);
// Make sure the work is actually running, so we don't deadlock
if ((error = clFlush(gQueue)))
{
vlog_error("clFlush failed with error %d\n", error);
gFailCount++;
return error;
}
ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info);
// signal we are done calculating the reference results
if ((error = clSetUserEventStatus(
writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
{
vlog_error(
"Error: Failed to set user event status to CL_COMPLETE: %d\n",
error);
gFailCount++;
return error;
}
// Wait for the event callbacks to finish verifying correctness.
if ((error = clWaitForEvents(
1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
{
vlog_error("Error: Failed to wait for barrier: %d\n", error);
gFailCount++;
return error;
}
if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
{
vlog_error("Error: Failed to release calcReferenceValues: %d\n",
error);
gFailCount++;
return error;
}
if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
{
vlog_error("Error: Failed to release done barrier: %d\n", error);
gFailCount++;
return error;
}
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
vectorSize++)
{
if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result))
{
switch (inType)
{
case kuchar:
case kchar:
vlog("Input value: 0x%2.2x ",
((unsigned char *)gIn)[error - 1]);
break;
case kushort:
case kshort:
vlog("Input value: 0x%4.4x ",
((unsigned short *)gIn)[error - 1]);
break;
case kuint:
case kint:
vlog("Input value: 0x%8.8x ",
((unsigned int *)gIn)[error - 1]);
break;
case khalf:
vlog("Input value: %a ",
HTF(((cl_half *)gIn)[error - 1]));
break;
case kfloat:
vlog("Input value: %a ", ((float *)gIn)[error - 1]);
break;
case kulong:
case klong:
vlog("Input value: 0x%16.16llx ",
((unsigned long long *)gIn)[error - 1]);
break;
case kdouble:
vlog("Input value: %a ", ((double *)gIn)[error - 1]);
break;
default:
vlog_error("Internal error at %s: %d\n", __FILE__,
__LINE__);
abort();
break;
}
// tell the user which conversion it was.
if (0 == vectorSize)
vlog(" (implicit scalar conversion from %s to %s)\n",
gTypeNames[inType], gTypeNames[outType]);
else
vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
sizeNames[vectorSize], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType],
sizeNames[vectorSize]);
gFailCount++;
return error;
}
}
}
log_info("done.\n");
if (gTimeResults)
{
// Kick off tests for the various vector lengths
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
vectorSize++)
{
size_t workItemCount = blockCount / vectorSizes[vectorSize];
if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
workItemCount /=
4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
double sum = 0.0;
double bestTime = INFINITY;
cl_uint k;
for (k = 0; k < PERF_LOOP_COUNT; k++)
{
uint64_t startTime = conv_test::GetTime();
if ((error = conv_test::RunKernel(
writeInputBufferInfo.calcInfo[vectorSize]->kernel,
gInBuffer, gOutBuffers[vectorSize], workItemCount)))
{
gFailCount++;
return error;
}
// Make sure OpenCL is done
if ((error = clFinish(gQueue)))
{
vlog_error("Error %d at clFinish\n", error);
return error;
}
uint64_t endTime = conv_test::GetTime();
double time = SubtractTime(endTime, startTime);
sum += time;
if (time < bestTime) bestTime = time;
}
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
double clocksPerOp = bestTime * (double)gDeviceFrequency
* gComputeDevices * gSimdSize * 1e6
/ (workItemCount * vectorSizes[vectorSize]);
if (0 == vectorSize)
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
"implicit convert %s -> %s", gTypeNames[inType],
gTypeNames[outType]);
else
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
"convert_%s%s%s%s( %s%s )", gTypeNames[outType],
sizeNames[vectorSize], gSaturationNames[sat],
gRoundingModeNames[round], gTypeNames[inType],
sizeNames[vectorSize]);
}
}
if (gWimpyMode)
vlog("\tWimp pass");
else
vlog("\tpassed");
#ifdef __APPLE__
// record the run time
vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
#endif
vlog("\n\n");
fflush(stdout);
return error;
}
#if !defined(__APPLE__)
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
#endif
#if defined(_MSC_VER)
/* function is defined in "compat.h" */
#else
double SubtractTime(uint64_t endTime, uint64_t startTime)
{
uint64_t diff = endTime - startTime;
static double conversion = 0.0;
if (0.0 == conversion)
{
#if defined(__APPLE__)
mach_timebase_info_data_t info = { 0, 0 };
kern_return_t err = mach_timebase_info(&info);
if (0 == err)
conversion = 1e-9 * (double)info.numer / (double)info.denom;
#else
// This function consumes output from GetTime() above, and converts the
// time to secionds.
#warning need accurate ticks to seconds conversion factor here. Times are invalid.
#endif
}
// strictly speaking we should also be subtracting out timer latency here
return conversion * (double)diff;
}
#endif
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
void *data);
// Note: May be called reentrantly
void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
{
cl_int status;
// CalcRefValsBase *info = (CalcRefValsBase *)data;
cl_event calcReferenceValues = info->parent->calcReferenceValues;
// we know that the map is done, wait for the main thread to finish
// calculating the reference values
if ((status =
clSetEventCallback(calcReferenceValues, CL_COMPLETE,
CalcReferenceValuesComplete, (void *)&info)))
{
vlog_error("ERROR: clSetEventCallback failed in "
"MapResultValuesComplete with status: %d\n",
status);
gFailCount++; // not thread safe -- being lazy here
}
// this thread no longer needs its reference to info->calcReferenceValues,
// so release it
if ((status = clReleaseEvent(calcReferenceValues)))
{
vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
"with status: %d\n",
status);
gFailCount++; // not thread safe -- being lazy here
}
// no need to flush since we didn't enqueue anything
// e was already released by WriteInputBufferComplete. It should be
// destroyed automatically soon after we exit.
}
template <typename T> static bool isnan_fp(const T &v)
{
if (std::is_same<T, cl_half>::value)
{
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
return (h_exp == 0x1F && h_mant != 0);
}
else
{
#if !defined(_WIN32)
return std::isnan(v);
#else
return _isnan(v);
#endif
}
}
template <typename InType>
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
{
InType *inp = (InType *)input;
for (auto j = 0; j < count; j++)
{
if (isnan_fp<InType>(inp[j]))
memset((char *)mapped + j * gTypeSizes[outType], 0,
gTypeSizes[outType]);
}
}
template <typename InType, typename OutType>
void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
{
if (std::is_same<OutType, cl_half>::value)
{
for (auto j = 0; j < count; j++)
if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
outp[j] = 0x7e00; // HALF_NAN
}
else
{
for (auto j = 0; j < count; j++)
if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
}
}
void FixNanConversions(Type outType, Type inType, void *d, cl_uint count,
void *inp)
{
if (outType != kfloat && outType != kdouble && outType != khalf)
{
if (inType == kfloat)
ZeroNanToIntCases<float>(count, d, outType, inp);
else if (inType == kdouble)
ZeroNanToIntCases<double>(count, d, outType, inp);
else if (inType == khalf)
ZeroNanToIntCases<cl_half>(count, d, outType, inp);
}
else if (inType == kfloat || inType == kdouble || inType == khalf)
{
// outtype and intype is float or double or half. NaN conversions for
// float/double/half could be any NaN
if (inType == kfloat)
{
float *inp = (float *)gIn;
if (outType == kdouble)
{
double *outp = (double *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == khalf)
{
cl_half *outp = (cl_half *)d;
FixNanToFltConversions(inp, outp, count);
}
}
else if (inType == kdouble)
{
double *inp = (double *)gIn;
if (outType == kfloat)
{
float *outp = (float *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == khalf)
{
cl_half *outp = (cl_half *)d;
FixNanToFltConversions(inp, outp, count);
}
}
else if (inType == khalf)
{
cl_half *inp = (cl_half *)gIn;
if (outType == kfloat)
{
float *outp = (float *)d;
FixNanToFltConversions(inp, outp, count);
}
else if (outType == kdouble)
{
double *outp = (double *)d;
FixNanToFltConversions(inp, outp, count);
}
}
}
}
void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
void *data)
{
std::unique_ptr<CalcRefValsBase> &info =
*(std::unique_ptr<CalcRefValsBase> *)data;
cl_uint vectorSize = info->vectorSize;
cl_uint count = info->parent->count;
Type outType =
info->parent->outType; // the data type of the conversion result
Type inType = info->parent->inType; // the data type of the conversion input
cl_int error;
cl_event doneBarrier = info->parent->doneBarrier;
// report spurious error condition
if (CL_SUCCESS != status)
{
vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
status);
gFailCount++; // lazy about thread safety here
return;
}
// Now we know that both results have been mapped back from the device, and
// the main thread is done calculating the reference results. It is now time
// to check the results.
// verify results
void *mapped = info->p;
// Patch up NaNs conversions to integer to zero -- these can be converted to
// any integer
FixNanConversions(outType, inType, mapped, count, gIn);
if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
info->result =
info->check_result(mapped, count, vectorSizes[vectorSize]);
else
info->result = 0;
// Fill the output buffer with junk and release it
{
cl_uint pattern = 0xffffdead;
memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
mapped, 0, NULL, NULL)))
{
vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
"CalcReferenceValuesComplete (%d)\n",
error);
gFailCount++;
}
}
if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
{
if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
{
vlog_error("ERROR: clSetUserEventStatus failed in "
"CalcReferenceValuesComplete (err: %d). We're probably "
"going to deadlock.\n",
status);
gFailCount++;
return;
}
if ((status = clReleaseEvent(doneBarrier)))
{
vlog_error("ERROR: clReleaseEvent failed in "
"CalcReferenceValuesComplete (err: %d).\n",
status);
gFailCount++;
return;
}
}
// e was already released by WriteInputBufferComplete. It should be
// destroyed automatically soon after all the calls to
// CalcReferenceValuesComplete exit.
}
namespace conv_test {
cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
{
DataInitBase *info = (DataInitBase *)p;
info->init(job_id, thread_id);
return CL_SUCCESS;
}
cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
{
DataInitBase *info = (DataInitBase *)p;
cl_uint count = info->size;
Type inType = info->inType;
Type outType = info->outType;
RoundingMode round = info->round;
Force64BitFPUPrecision();
void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
void *a = (cl_uchar *)gAllowZ + job_id * count;
void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
if (outType != inType)
{
// create the reference while we wait
#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
/* ARM VFP doesn't have hardware instruction for converting from 64-bit
* integer to float types, hence GCC ARM uses the floating-point
* emulation code despite which -mfloat-abi setting it is. But the
* emulation code in libgcc.a has only one rounding mode (round to
* nearest even in this case) and ignores the user rounding mode setting
* in hardware. As a result setting rounding modes in hardware won't
* give correct rounding results for type covert from 64-bit integer to
* float using GCC for ARM compiler so for testing different rounding
* modes, we need to use alternative reference function. ARM64 does have
* an instruction, however we cannot guarantee the compiler will use it.
* On all ARM architechures use emulation to calculate reference.*/
switch (round)
{
/* conversions to floating-point type use the current rounding mode.
* The only default floating-point rounding mode supported is round
* to nearest even i.e the current rounding mode will be _rte for
* floating-point types. */
case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
case kRoundToNearestEven: qcom_rm = qcomRTE; break;
case kRoundUp: qcom_rm = qcomRTP; break;
case kRoundDown: qcom_rm = qcomRTN; break;
case kRoundTowardZero: qcom_rm = qcomRTZ; break;
default:
vlog_error("ERROR: undefined rounding mode %d\n", round);
break;
}
qcom_sat = info->sat;
#endif
RoundingMode oldRound;
if (outType == khalf)
{
oldRound = set_round(kRoundToNearestEven, kfloat);
switch (round)
{
default:
case kDefaultRoundingMode:
DataInitInfo::halfRoundingMode =
ConversionsTest::defaultHalfRoundingMode;
break;
case kRoundToNearestEven:
DataInitInfo::halfRoundingMode = CL_HALF_RTE;
break;
case kRoundUp:
DataInitInfo::halfRoundingMode = CL_HALF_RTP;
break;
case kRoundDown:
DataInitInfo::halfRoundingMode = CL_HALF_RTN;
break;
case kRoundTowardZero:
DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
break;
}
}
else
oldRound = set_round(round, outType);
if (info->sat)
info->conv_array_sat(d, s, count);
else
info->conv_array(d, s, count);
set_round(oldRound, outType);
// Decide if we allow a zero result in addition to the correctly rounded
// one
memset(a, 0, count);
if (gForceFTZ && (inType == kfloat || outType == kfloat))
{
info->set_allow_zero_array((uint8_t *)a, d, s, count);
}
if (gForceHalfFTZ && (inType == khalf || outType == khalf))
{
info->set_allow_zero_array((uint8_t *)a, d, s, count);
}
}
else
{
// Copy the input to the reference
memcpy(d, s, info->size * gTypeSizes[inType]);
}
// Patch up NaNs conversions to integer to zero -- these can be converted to
// any integer
FixNanConversions(outType, inType, d, count, s);
return CL_SUCCESS;
}
uint64_t GetTime(void)
{
#if defined(__APPLE__)
return mach_absolute_time();
#elif defined(_MSC_VER)
return ReadTime();
#else
// mach_absolute_time is a high precision timer with precision < 1
// microsecond.
#warning need accurate clock here. Times are invalid.
return 0;
#endif
}
// Note: not called reentrantly
void WriteInputBufferComplete(void *data)
{
cl_int status;
WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
cl_uint count = info->count;
int vectorSize;
info->barrierCount = gMaxVectorSize - gMinVectorSize;
// now that we know that the write buffer is complete, enqueue callbacks to
// wait for the main thread to finish calculating the reference results.
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
{
size_t workItemCount =
(count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel,
gInBuffer, gOutBuffers[vectorSize],
workItemCount)))
{
gFailCount++;
return;
}
info->calcInfo[vectorSize]->p = clEnqueueMapBuffer(
gQueue, gOutBuffers[vectorSize], CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
NULL, NULL, &status);
{
if (status)
{
vlog_error("ERROR: WriteInputBufferComplete calback failed "
"with status: %d\n",
status);
gFailCount++;
return;
}
}
}
for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
{
MapResultValuesComplete(info->calcInfo[vectorSize]);
}
// Make sure the work starts moving -- otherwise we may deadlock
if ((status = clFlush(gQueue)))
{
vlog_error(
"ERROR: WriteInputBufferComplete calback failed with status: %d\n",
status);
gFailCount++;
return;
}
// e was already released by the main thread. It should be destroyed
// automatically soon after we exit.
}
cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
RoundingMode round, int vectorSize, cl_kernel *outKernel)
{
cl_program program;
char testName[256];
int error = 0;
std::ostringstream source;
if (outType == kdouble || inType == kdouble)
source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (outType == khalf || inType == khalf)
source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
// Create the program. This is a bit complicated because we are trying to
// avoid byte and short stores.
if (0 == vectorSize)
{
// Create the type names.
char inName[32];
char outName[32];
strncpy(inName, gTypeNames[inType], sizeof(inName));
strncpy(outName, gTypeNames[outType], sizeof(outName));
sprintf(testName, "test_implicit_%s_%s", outName, inName);
source << "__kernel void " << testName << "( __global " << inName
<< " *src, __global " << outName << " *dest )\n";
source << "{\n";
source << " size_t i = get_global_id(0);\n";
source << " dest[i] = src[i];\n";
source << "}\n";
vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
gTypeNames[outType]);
fflush(stdout);
}
else
{
int vectorSizetmp = vectorSizes[vectorSize];
// Create the type names.
char convertString[128];
char inName[32];
char outName[32];
switch (vectorSizetmp)
{
case 1:
strncpy(inName, gTypeNames[inType], sizeof(inName));
strncpy(outName, gTypeNames[outType], sizeof(outName));
snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
outName, gSaturationNames[sat],
gRoundingModeNames[round]);
snprintf(testName, 256, "test_%s_%s", convertString, inName);
vlog("Building %s( %s ) test\n", convertString, inName);
break;
case 3:
strncpy(inName, gTypeNames[inType], sizeof(inName));
strncpy(outName, gTypeNames[outType], sizeof(outName));
snprintf(convertString, sizeof(convertString),
"convert_%s3%s%s", outName, gSaturationNames[sat],
gRoundingModeNames[round]);
snprintf(testName, 256, "test_%s_%s3", convertString, inName);
vlog("Building %s( %s3 ) test\n", convertString, inName);
break;
default:
snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
vectorSizetmp);
snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
vectorSizetmp);
snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
outName, gSaturationNames[sat],
gRoundingModeNames[round]);
snprintf(testName, 256, "test_%s_%s", convertString, inName);
vlog("Building %s( %s ) test\n", convertString, inName);
break;
}
fflush(stdout);
if (vectorSizetmp == 3)
{
source << "__kernel void " << testName << "( __global " << inName
<< " *src, __global " << outName << " *dest )\n";
source << "{\n";
source << " size_t i = get_global_id(0);\n";
source << " if( i + 1 < get_global_size(0))\n";
source << " vstore3( " << convertString
<< "( vload3( i, src)), i, dest );\n";
source << " else\n";
source << " {\n";
source << " " << inName << "3 in;\n";
source << " " << outName << "3 out;\n";
source << " if( 0 == (i & 1) )\n";
source << " in.y = src[3*i+1];\n";
source << " in.x = src[3*i];\n";
source << " out = " << convertString << "( in ); \n";
source << " dest[3*i] = out.x;\n";
source << " if( 0 == (i & 1) )\n";
source << " dest[3*i+1] = out.y;\n";
source << " }\n";
source << "}\n";
}
else
{
source << "__kernel void " << testName << "( __global " << inName
<< " *src, __global " << outName << " *dest )\n";
source << "{\n";
source << " size_t i = get_global_id(0);\n";
source << " dest[i] = " << convertString << "( src[i] );\n";
source << "}\n";
}
}
*outKernel = NULL;
const char *flags = NULL;
if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
// build it
std::string sourceString = source.str();
const char *programSource = sourceString.c_str();
error = create_single_kernel_helper(gContext, &program, outKernel, 1,
&programSource, testName, flags);
if (error)
{
vlog_error("Failed to build kernel/program (err = %d).\n", error);
return NULL;
}
return program;
}
//
int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount)
{
// The global dimensions are just the blockCount to execute since we haven't
// set up multiple queues for multiple devices.
int error;
error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
if (error)
{
vlog_error("FAILED -- could not set kernel args (%d)\n", error);
return error;
}
if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
NULL, 0, NULL, NULL)))
{
vlog_error("FAILED -- could not execute kernel (%d)\n", error);
return error;
}
return 0;
}
int GetTestCase(const char *name, Type *outType, Type *inType,
SaturationMode *sat, RoundingMode *round)
{
int i;
// Find the return type
for (i = 0; i < kTypeCount; i++)
if (name == strstr(name, gTypeNames[i]))
{
*outType = (Type)i;
name += strlen(gTypeNames[i]);
break;
}
if (i == kTypeCount) return -1;
// Check to see if _sat appears next
*sat = (SaturationMode)0;
for (i = 1; i < kSaturationModeCount; i++)
if (name == strstr(name, gSaturationNames[i]))
{
*sat = (SaturationMode)i;
name += strlen(gSaturationNames[i]);
break;
}
*round = (RoundingMode)0;
for (i = 1; i < kRoundingModeCount; i++)
if (name == strstr(name, gRoundingModeNames[i]))
{
*round = (RoundingMode)i;
name += strlen(gRoundingModeNames[i]);
break;
}
if (*name != '_') return -2;
name++;
for (i = 0; i < kTypeCount; i++)
if (name == strstr(name, gTypeNames[i]))
{
*inType = (Type)i;
name += strlen(gTypeNames[i]);
break;
}
if (i == kTypeCount) return -3;
if (*name != '\0') return -4;
return 0;
}
} // namespace conv_test