mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-20 14:39:01 +00:00
* Improve Functionality of Harness In the harness we previously were able to determine whether or not a device supports the half or double data types, but doing so required unintuitive function calls and would need to be repeated per test. A new pair of functions have been added which clearly state what they do, and makes it easier to determine whether or not a device supports the types. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * Remove Old GetKernelArgInfo Tests (#522) In the API test suite we have 2 versions which test the clGetKernelArgInfo API. As part of this ticket we are redesigning the implementation of this test. This change removes all of the old code and makes it so that the tests simply pass. A later commit will add the redesigned test Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * Redesign GetKernelArgInfo (#522) The previous test for this API consisted of 5K+ lines of code which would define the test kernels and the expected outputs from this API. This redesign instead generates the kernels and expected outputs leading to incresased maintanability and a significantly reduce line-of-code count. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Address Review Comments This commit does the following: 1) Update the Copyright to 2021 2) Fixes a typo in a comment 3) Explicitly declares a vector variable (previously auto) 4) Output subtest result after completion rather than all of them at the end Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Ensure Kernel Arguments do not exceed CL_DEVICE_MAX_PARAMETER_SIZE As per upstream comments, this change ensures that the total size of parameters passed into a kernel does not exceed the limit specified by CL_DEVICE_MAX_PARAMETER_SIZE for the device used. Additionally this change replaces ASSERT_SUCCESS() with test_error() as per upstream requests. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Address Image and Vector Failures This change aligns vector 3 types to be sized 4. Additionally it ensures that image arguments do not have the address space qualifier specified because they are by default in the __global space. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Ensure that the size of pipe arguments are correct As mentioned in PR comments, the test previously assumed that sizeof(char) == sizeof(pipe char). The Clang implementation treats a pipe to take the same size as a pointer, which is now reflected in the code. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Ensure that CL_DEVICE_MAX_PIPE_ARGS is not Exceeded This commit refactors the code so that Pipes are handled separately. Additionally, it removes signed char and char signed as scalar types to test and removes some redundent code for modifiying the expected type when processing unsigned scalar types. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Remove compatibility test from skip-list There is a list of tests which should be skipped when using an offline compiler. As get_kernel_arg_compatibility has been removed, it should also be removed here. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com> * [SQUASH] Disable Pipe Tests This change disables the Pipe tests for clGetKernelArgInfo as pipe metadata is not accurately reported on clang which leads to the pipe tests failing. Signed-off-by: Chetankumar Mistry <chetan.mistry@arm.com>
711 lines
25 KiB
C++
711 lines
25 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "compat.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "errorHelpers.h"
|
|
|
|
#include "parseParameters.h"
|
|
|
|
#include <CL/cl_half.h>
|
|
|
|
const char *IGetErrorString(int clErrorCode)
|
|
{
|
|
switch (clErrorCode)
|
|
{
|
|
case CL_SUCCESS: return "CL_SUCCESS";
|
|
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
|
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
|
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
|
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
|
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
|
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
|
case CL_PROFILING_INFO_NOT_AVAILABLE:
|
|
return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
|
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
|
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
|
|
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
|
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
|
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
|
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
|
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
|
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
|
|
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
|
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
|
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
|
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
|
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
|
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
|
|
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
|
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
|
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
|
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
|
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
|
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
|
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
|
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
|
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
|
|
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
|
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
|
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
|
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
|
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
|
case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM";
|
|
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
|
case CL_INVALID_PROGRAM_EXECUTABLE:
|
|
return "CL_INVALID_PROGRAM_EXECUTABLE";
|
|
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
|
case CL_INVALID_KERNEL_DEFINITION:
|
|
return "CL_INVALID_KERNEL_DEFINITION";
|
|
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
|
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
|
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
|
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
|
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
|
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
|
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
|
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
|
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
|
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
|
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
|
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
|
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
|
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
|
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
|
|
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
|
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
|
|
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
|
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
|
|
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
|
|
case CL_INVALID_DEVICE_PARTITION_COUNT:
|
|
return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
|
case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE";
|
|
case CL_INVALID_DEVICE_QUEUE: return "CL_INVALID_DEVICE_QUEUE";
|
|
case CL_INVALID_SPEC_ID: return "CL_INVALID_SPEC_ID";
|
|
case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
|
|
return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
|
|
default: return "(unknown)";
|
|
}
|
|
}
|
|
|
|
const char *GetChannelOrderName(cl_channel_order order)
|
|
{
|
|
switch (order)
|
|
{
|
|
case CL_R: return "CL_R";
|
|
case CL_A: return "CL_A";
|
|
case CL_Rx: return "CL_Rx";
|
|
case CL_RG: return "CL_RG";
|
|
case CL_RA: return "CL_RA";
|
|
case CL_RGx: return "CL_RGx";
|
|
case CL_RGB: return "CL_RGB";
|
|
case CL_RGBx: return "CL_RGBx";
|
|
case CL_RGBA: return "CL_RGBA";
|
|
case CL_ARGB: return "CL_ARGB";
|
|
case CL_BGRA: return "CL_BGRA";
|
|
case CL_INTENSITY: return "CL_INTENSITY";
|
|
case CL_LUMINANCE: return "CL_LUMINANCE";
|
|
#if defined CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
|
|
#endif
|
|
#if defined CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
|
|
#endif
|
|
#if defined CL_ABGR_APPLE
|
|
case CL_ABGR_APPLE: return "CL_ABGR_APPLE";
|
|
#endif
|
|
case CL_DEPTH: return "CL_DEPTH";
|
|
case CL_DEPTH_STENCIL: return "CL_DEPTH_STENCIL";
|
|
case CL_sRGB: return "CL_sRGB";
|
|
case CL_sRGBA: return "CL_sRGBA";
|
|
case CL_sRGBx: return "CL_sRGBx";
|
|
case CL_sBGRA: return "CL_sBGRA";
|
|
case CL_ABGR: return "CL_ABGR";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
int IsChannelOrderSupported(cl_channel_order order)
|
|
{
|
|
switch (order)
|
|
{
|
|
case CL_R:
|
|
case CL_A:
|
|
case CL_Rx:
|
|
case CL_RG:
|
|
case CL_RA:
|
|
case CL_RGx:
|
|
case CL_RGB:
|
|
case CL_RGBx:
|
|
case CL_RGBA:
|
|
case CL_ARGB:
|
|
case CL_BGRA:
|
|
case CL_INTENSITY:
|
|
case CL_LUMINANCE:
|
|
case CL_ABGR:
|
|
case CL_sRGB:
|
|
case CL_sRGBx:
|
|
case CL_sBGRA:
|
|
case CL_sRGBA:
|
|
case CL_DEPTH: return 1;
|
|
#if defined CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE: return 1;
|
|
#endif
|
|
#if defined CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE: return 1;
|
|
#endif
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
const char *GetChannelTypeName(cl_channel_type type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
|
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
|
case CL_UNORM_INT8: return "CL_UNORM_INT8";
|
|
case CL_UNORM_INT16: return "CL_UNORM_INT16";
|
|
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
|
|
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
|
|
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
|
|
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
|
|
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
|
|
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
|
|
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
|
|
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
|
|
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
|
|
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
|
|
case CL_FLOAT: return "CL_FLOAT";
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
|
|
#endif
|
|
case CL_UNORM_INT24: return "CL_UNORM_INT24";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
int IsChannelTypeSupported(cl_channel_type type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CL_SNORM_INT8:
|
|
case CL_SNORM_INT16:
|
|
case CL_UNORM_INT8:
|
|
case CL_UNORM_INT16:
|
|
case CL_UNORM_INT24:
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555:
|
|
case CL_UNORM_INT_101010:
|
|
case CL_SIGNED_INT8:
|
|
case CL_SIGNED_INT16:
|
|
case CL_SIGNED_INT32:
|
|
case CL_UNSIGNED_INT8:
|
|
case CL_UNSIGNED_INT16:
|
|
case CL_UNSIGNED_INT32:
|
|
case CL_HALF_FLOAT:
|
|
case CL_FLOAT: return 1;
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE: return 1;
|
|
#endif
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
const char *GetAddressModeName(cl_addressing_mode mode)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
|
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
|
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
|
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
|
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
const char *GetDeviceTypeName(cl_device_type type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
|
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
|
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
|
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
|
|
size_t vecSize, char *buffer)
|
|
{
|
|
static char scratch[1024];
|
|
size_t i, j;
|
|
|
|
if (buffer == NULL) buffer = scratch;
|
|
|
|
unsigned char *p = (unsigned char *)dataBuffer;
|
|
char *bPtr;
|
|
|
|
buffer[0] = 0;
|
|
bPtr = buffer;
|
|
for (i = 0; i < vecSize; i++)
|
|
{
|
|
if (i > 0)
|
|
{
|
|
bPtr[0] = ' ';
|
|
bPtr++;
|
|
}
|
|
for (j = 0; j < typeSize; j++)
|
|
{
|
|
sprintf(bPtr, "%02x", (unsigned int)p[typeSize - j - 1]);
|
|
bPtr += 2;
|
|
}
|
|
p += typeSize;
|
|
}
|
|
bPtr[0] = 0;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
const char *GetQueuePropertyName(cl_command_queue_properties property)
|
|
{
|
|
switch (property)
|
|
{
|
|
case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
|
|
return "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE";
|
|
case CL_QUEUE_PROFILING_ENABLE: return "CL_QUEUE_PROFILING_ENABLE";
|
|
case CL_QUEUE_ON_DEVICE: return "CL_QUEUE_ON_DEVICE";
|
|
case CL_QUEUE_ON_DEVICE_DEFAULT: return "CL_QUEUE_ON_DEVICE_DEFAULT";
|
|
default: return "(unknown)";
|
|
}
|
|
}
|
|
|
|
#ifndef MAX
|
|
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
|
|
#endif
|
|
|
|
#if defined(_MSC_VER)
|
|
#define scalbnf(_a, _i) ldexpf(_a, _i)
|
|
#define scalbn(_a, _i) ldexp(_a, _i)
|
|
#define scalbnl(_a, _i) ldexpl(_a, _i)
|
|
#endif
|
|
|
|
// taken from math tests
|
|
#define HALF_MIN_EXP -13
|
|
#define HALF_MANT_DIG 11
|
|
static float Ulp_Error_Half_Float(float test, double reference)
|
|
{
|
|
union {
|
|
double d;
|
|
uint64_t u;
|
|
} u;
|
|
u.d = reference;
|
|
|
|
// Note: This function presumes that someone has already tested whether the
|
|
// result is correctly, rounded before calling this function. That test:
|
|
//
|
|
// if( (float) reference == test )
|
|
// return 0.0f;
|
|
//
|
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
|
// are otherwise correctly rounded results.
|
|
|
|
double testVal = test;
|
|
|
|
if (isinf(reference))
|
|
{
|
|
if (testVal == reference) return 0.0f;
|
|
|
|
return (float)(testVal - reference);
|
|
}
|
|
|
|
if (isinf(testVal))
|
|
{
|
|
// Allow overflow within the limit of the allowed ulp error. Towards
|
|
// that end we pretend the test value is actually 2**16, the next value
|
|
// that would appear in the number line if half had sufficient range.
|
|
testVal = copysign(65536.0, testVal);
|
|
}
|
|
|
|
|
|
if (u.u & 0x000fffffffffffffULL)
|
|
{ // Non-power of two and NaN
|
|
if (isnan(reference) && isnan(test))
|
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
|
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp =
|
|
HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
return (float)scalbn(testVal - reference, ulp_exp);
|
|
}
|
|
|
|
// reference is a normal power of two or a zero
|
|
int ulp_exp =
|
|
HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
return (float)scalbn(testVal - reference, ulp_exp);
|
|
}
|
|
|
|
float Ulp_Error_Half(cl_half test, float reference)
|
|
{
|
|
return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
|
|
}
|
|
|
|
|
|
float Ulp_Error(float test, double reference)
|
|
{
|
|
union {
|
|
double d;
|
|
uint64_t u;
|
|
} u;
|
|
u.d = reference;
|
|
double testVal = test;
|
|
|
|
// Note: This function presumes that someone has already tested whether the
|
|
// result is correctly, rounded before calling this function. That test:
|
|
//
|
|
// if( (float) reference == test )
|
|
// return 0.0f;
|
|
//
|
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
|
// are otherwise correctly rounded results.
|
|
|
|
|
|
if (isinf(reference))
|
|
{
|
|
if (testVal == reference) return 0.0f;
|
|
|
|
return (float)(testVal - reference);
|
|
}
|
|
|
|
if (isinf(testVal))
|
|
{ // infinite test value, but finite (but possibly overflowing in float)
|
|
// reference.
|
|
//
|
|
// The function probably overflowed prematurely here. Formally, the spec
|
|
// says this is an infinite ulp error and should not be tolerated.
|
|
// Unfortunately, this would mean that the internal precision of some
|
|
// half_pow implementations would have to be 29+ bits at half_powr(
|
|
// 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
|
// is not exactly 128.0. You might represent this for example as 4*(32 -
|
|
// ~2**-24), which after rounding to single is 4*32 = 128, which will
|
|
// ultimately result in premature overflow, even though a good faith
|
|
// representation would be correct to within 2**-29 interally.
|
|
|
|
// In the interest of not requiring the implementation go to
|
|
// extraordinary lengths to deliver a half precision function, we allow
|
|
// premature overflow within the limit of the allowed ulp error.
|
|
// Towards, that end, we "pretend" the test value is actually 2**128,
|
|
// the next value that would appear in the number line if float had
|
|
// sufficient range.
|
|
testVal = copysign(MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal);
|
|
|
|
// Note that the same hack may not work in long double, which is not
|
|
// guaranteed to have more range than double. It is not clear that
|
|
// premature overflow should be tolerated for double.
|
|
}
|
|
|
|
if (u.u & 0x000fffffffffffffULL)
|
|
{ // Non-power of two and NaN
|
|
if (isnan(reference) && isnan(test))
|
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
|
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
return (float)scalbn(testVal - reference, ulp_exp);
|
|
}
|
|
|
|
// reference is a normal power of two or a zero
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
return (float)scalbn(testVal - reference, ulp_exp);
|
|
}
|
|
|
|
float Ulp_Error_Double(double test, long double reference)
|
|
{
|
|
// Deal with long double = double
|
|
// On most systems long double is a higher precision type than double. They
|
|
// provide either a 80-bit or greater floating point type, or they provide a
|
|
// head-tail double double format. That is sufficient to represent the
|
|
// accuracy of a floating point result to many more bits than double and we
|
|
// can calculate sub-ulp errors. This is the standard system for which this
|
|
// test suite is designed.
|
|
//
|
|
// On some systems double and long double are the same thing. Then we run
|
|
// into a problem, because our representation of the infinitely precise
|
|
// result (passed in as reference above) can be off by as much as a half
|
|
// double precision ulp itself. In this case, we inflate the reported error
|
|
// by half an ulp to take this into account. A more correct and permanent
|
|
// fix would be to undertake refactoring the reference code to return
|
|
// results in this format:
|
|
//
|
|
// typedef struct DoubleReference
|
|
// {
|
|
// // true value = correctlyRoundedResult + ulps *
|
|
// // ulp(correctlyRoundedResult) (infinitely precise)
|
|
// // as best we can:
|
|
// double correctlyRoundedResult;
|
|
// // plus a fractional amount to account for the difference
|
|
// // between infinitely precise result and correctlyRoundedResult,
|
|
// // in units of ulps:
|
|
// double ulps;
|
|
// } DoubleReference;
|
|
//
|
|
// This would provide a useful higher-than-double precision format for
|
|
// everyone that we can use, and would solve a few problems with
|
|
// representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
|
// that use a head to tail double double for long double.
|
|
|
|
// Note: This function presumes that someone has already tested whether the
|
|
// result is correctly, rounded before calling this function. That test:
|
|
//
|
|
// if( (float) reference == test )
|
|
// return 0.0f;
|
|
//
|
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
|
// are otherwise correctly rounded results.
|
|
|
|
|
|
int x;
|
|
long double testVal = test;
|
|
if (0.5L != frexpl(reference, &x))
|
|
{ // Non-power of two and NaN
|
|
if (isinf(reference))
|
|
{
|
|
if (testVal == reference) return 0.0f;
|
|
|
|
return (float)(testVal - reference);
|
|
}
|
|
|
|
if (isnan(reference) && isnan(test))
|
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
|
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp =
|
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
|
|
|
// account for rounding error in reference result on systems that do not
|
|
// have a higher precision floating point type (see above)
|
|
if (sizeof(long double) == sizeof(double))
|
|
result += copysignf(0.5f, result);
|
|
|
|
return result;
|
|
}
|
|
|
|
// reference is a normal power of two or a zero
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp =
|
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
|
|
|
// account for rounding error in reference result on systems that do not
|
|
// have a higher precision floating point type (see above)
|
|
if (sizeof(long double) == sizeof(double))
|
|
result += copysignf(0.5f, result);
|
|
|
|
return result;
|
|
}
|
|
|
|
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
|
|
cl_device_id *device_list)
|
|
{
|
|
int error;
|
|
size_t size_ret;
|
|
|
|
// Does the program object exist?
|
|
if (program != NULL)
|
|
{
|
|
|
|
// Was the number of devices given
|
|
if (num_devices == 0)
|
|
{
|
|
|
|
// If zero devices were specified then allocate and query the device
|
|
// list from the context
|
|
cl_context context;
|
|
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT,
|
|
sizeof(context), &context, NULL);
|
|
test_error(error, "Unable to query program's context");
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
|
|
&size_ret);
|
|
test_error(error, "Unable to query context's device size");
|
|
num_devices = size_ret / sizeof(cl_device_id);
|
|
device_list = (cl_device_id *)malloc(size_ret);
|
|
if (device_list == NULL)
|
|
{
|
|
print_error(error, "malloc failed");
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret,
|
|
device_list, NULL);
|
|
test_error(error, "Unable to query context's devices");
|
|
}
|
|
|
|
// For each device in the device_list
|
|
unsigned int i;
|
|
for (i = 0; i < num_devices; i++)
|
|
{
|
|
|
|
// Get the build status
|
|
cl_build_status build_status;
|
|
error = clGetProgramBuildInfo(
|
|
program, device_list[i], CL_PROGRAM_BUILD_STATUS,
|
|
sizeof(build_status), &build_status, &size_ret);
|
|
test_error(error, "Unable to query build status");
|
|
|
|
// If the build failed then log the status, and allocate the build
|
|
// log, log it and free it
|
|
if (build_status != CL_BUILD_SUCCESS)
|
|
{
|
|
|
|
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n",
|
|
(int)build_status);
|
|
error = clGetProgramBuildInfo(program, device_list[i],
|
|
CL_PROGRAM_BUILD_LOG, 0, NULL,
|
|
&size_ret);
|
|
test_error(error, "Unable to query build log size");
|
|
char *build_log = (char *)malloc(size_ret);
|
|
error = clGetProgramBuildInfo(program, device_list[i],
|
|
CL_PROGRAM_BUILD_LOG, size_ret,
|
|
build_log, &size_ret);
|
|
test_error(error, "Unable to query build log");
|
|
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
|
free(build_log);
|
|
}
|
|
}
|
|
|
|
// Was the number of devices given
|
|
if (num_devices == 0)
|
|
{
|
|
|
|
// If zero devices were specified then free the device list
|
|
free(device_list);
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
const char *subtests_to_skip_with_offline_compiler[] = {
|
|
"get_kernel_arg_info",
|
|
"binary_create",
|
|
"load_program_source",
|
|
"load_multistring_source",
|
|
"load_two_kernel_source",
|
|
"load_null_terminated_source",
|
|
"load_null_terminated_multi_line_source",
|
|
"load_null_terminated_partial_multi_line_source",
|
|
"load_discreet_length_source",
|
|
"get_program_source",
|
|
"get_program_build_info",
|
|
"options_build_optimizations",
|
|
"options_build_macro",
|
|
"options_build_macro_existence",
|
|
"options_include_directory",
|
|
"options_denorm_cache",
|
|
"preprocessor_define_udef",
|
|
"preprocessor_include",
|
|
"preprocessor_line_error",
|
|
"preprocessor_pragma",
|
|
"compiler_defines_for_extensions",
|
|
"image_macro",
|
|
"simple_extern_compile_only",
|
|
"simple_embedded_header_compile",
|
|
"two_file_regular_variable_access",
|
|
"two_file_regular_struct_access",
|
|
"two_file_regular_function_access",
|
|
"simple_embedded_header_link",
|
|
"execute_after_simple_compile_and_link_with_defines",
|
|
"execute_after_simple_compile_and_link_with_callbacks",
|
|
"execute_after_embedded_header_link",
|
|
"execute_after_included_header_link",
|
|
"multi_file_libraries",
|
|
"multiple_files",
|
|
"multiple_libraries",
|
|
"multiple_files_multiple_libraries",
|
|
"multiple_embedded_headers",
|
|
"program_binary_type",
|
|
"compile_and_link_status_options_log",
|
|
"kernel_preprocessor_macros",
|
|
"execute_after_serialize_reload_library",
|
|
"execute_after_serialize_reload_object",
|
|
"execute_after_simple_compile_and_link",
|
|
"execute_after_simple_compile_and_link_no_device_info",
|
|
"execute_after_simple_library_with_link",
|
|
"execute_after_two_file_link",
|
|
"simple_compile_only",
|
|
"simple_compile_with_callback",
|
|
"simple_library_only",
|
|
"simple_library_with_callback",
|
|
"simple_library_with_link",
|
|
"simple_link_only",
|
|
"simple_link_with_callback",
|
|
"simple_static_compile_only",
|
|
"two_file_link",
|
|
"async_build",
|
|
"unload_repeated",
|
|
"unload_compile_unload_link",
|
|
"unload_build_unload_create_kernel",
|
|
"unload_link_different",
|
|
"unload_build_threaded",
|
|
"unload_build_info",
|
|
"unload_program_binaries",
|
|
"features_macro",
|
|
"progvar_prog_scope_misc",
|
|
"library_function"
|
|
};
|
|
|
|
int check_functions_for_offline_compiler(const char *subtestname,
|
|
cl_device_id device)
|
|
{
|
|
if (gCompilationMode != kOnline)
|
|
{
|
|
int nNotRequiredWithOfflineCompiler =
|
|
sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
|
|
size_t i;
|
|
for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
|
|
{
|
|
if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
|
|
{
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|