mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 22:19:02 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
3087 lines
110 KiB
C++
3087 lines
110 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "image_helpers.h"
|
|
#include <math.h>
|
|
#include <limits.h>
|
|
#include <float.h>
|
|
#if defined( __APPLE__ )
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
|
|
int gTestCount = 0;
|
|
int gTestFailure = 0;
|
|
RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
|
|
|
|
static cl_ushort float2half_rte( float f );
|
|
static cl_ushort float2half_rtz( float f );
|
|
|
|
|
|
cl_channel_type get_channel_type_from_name( const char *name )
|
|
{
|
|
struct {
|
|
cl_channel_type type;
|
|
const char *name;
|
|
} typeNames[] = {
|
|
{ CL_SNORM_INT8, "CL_SNORM_INT8" },
|
|
{ CL_SNORM_INT16, "CL_SNORM_INT16" },
|
|
{ CL_UNORM_INT8, "CL_UNORM_INT8" },
|
|
{ CL_UNORM_INT16, "CL_UNORM_INT16" },
|
|
{ CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
|
|
{ CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
|
|
{ CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
|
|
{ CL_SIGNED_INT8, "CL_SIGNED_INT8" },
|
|
{ CL_SIGNED_INT16, "CL_SIGNED_INT16" },
|
|
{ CL_SIGNED_INT32, "CL_SIGNED_INT32" },
|
|
{ CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
|
|
{ CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
|
|
{ CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
|
|
{ CL_HALF_FLOAT, "CL_HALF_FLOAT" },
|
|
{ CL_FLOAT, "CL_FLOAT" },
|
|
#ifdef CL_SFIXED14_APPLE
|
|
{ CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
|
|
#endif
|
|
};
|
|
for( size_t i = 0; i < sizeof( typeNames ) / sizeof( typeNames[ 0 ] ); i++ )
|
|
{
|
|
if( strcmp( typeNames[ i ].name, name ) == 0 || strcmp( typeNames[ i ].name + 3, name ) == 0 )
|
|
return typeNames[ i ].type;
|
|
}
|
|
return (cl_channel_type)-1;
|
|
}
|
|
|
|
cl_channel_order get_channel_order_from_name( const char *name )
|
|
{
|
|
const struct
|
|
{
|
|
cl_channel_order order;
|
|
const char *name;
|
|
}orderNames[] =
|
|
{
|
|
{ CL_R, "CL_R" },
|
|
{ CL_A, "CL_A" },
|
|
{ CL_Rx, "CL_Rx" },
|
|
{ CL_RG, "CL_RG" },
|
|
{ CL_RA, "CL_RA" },
|
|
{ CL_RGx, "CL_RGx" },
|
|
{ CL_RGB, "CL_RGB" },
|
|
{ CL_RGBx, "CL_RGBx" },
|
|
{ CL_RGBA, "CL_RGBA" },
|
|
{ CL_BGRA, "CL_BGRA" },
|
|
{ CL_ARGB, "CL_ARGB" },
|
|
{ CL_INTENSITY, "CL_INTENSITY"},
|
|
{ CL_LUMINANCE, "CL_LUMINANCE"},
|
|
#ifdef CL_1RGB_APPLE
|
|
{ CL_1RGB_APPLE, "CL_1RGB_APPLE" },
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
{ CL_BGR1_APPLE, "CL_BGR1_APPLE" },
|
|
#endif
|
|
};
|
|
|
|
for( size_t i = 0; i < sizeof( orderNames ) / sizeof( orderNames[ 0 ] ); i++ )
|
|
{
|
|
if( strcmp( orderNames[ i ].name, name ) == 0 || strcmp( orderNames[ i ].name + 3, name ) == 0 )
|
|
return orderNames[ i ].order;
|
|
}
|
|
return (cl_channel_order)-1;
|
|
}
|
|
|
|
int random_log_in_range( int minV, int maxV, MTdata d )
|
|
{
|
|
double v = log2( ( (double)genrand_int32(d) / (double)0x7fffffff ) + 1 );
|
|
int iv = (int)( (float)( maxV - minV ) * v );
|
|
return iv + minV;
|
|
}
|
|
|
|
|
|
// Define the addressing functions
|
|
typedef int (*AddressFn)( int value, size_t maxValue );
|
|
|
|
int NoAddressFn( int value, size_t maxValue ) { return value; }
|
|
int RepeatAddressFn( int value, size_t maxValue )
|
|
{
|
|
if( value < 0 )
|
|
value += (int)maxValue;
|
|
else if( value >= (int)maxValue )
|
|
value -= (int)maxValue;
|
|
return value;
|
|
}
|
|
int MirroredRepeatAddressFn( int value, size_t maxValue )
|
|
{
|
|
if( value < 0 )
|
|
value = 0;
|
|
else if( (size_t) value >= maxValue )
|
|
value = (int) (maxValue - 1);
|
|
return value;
|
|
}
|
|
int ClampAddressFn( int value, size_t maxValue ) { return ( value < -1 ) ? -1 : ( ( value > (cl_long) maxValue ) ? (int)maxValue : value ); }
|
|
int ClampToEdgeNearestFn( int value, size_t maxValue ) { return ( value < 0 ) ? 0 : ( ( (size_t)value > maxValue - 1 ) ? (int)maxValue - 1 : value ); }
|
|
AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
|
|
|
|
// Note: normalized coords get repeated in normalized space, not unnormalized space! hence the special case here
|
|
volatile float gFloatHome;
|
|
float RepeatNormalizedAddressFn( float fValue, size_t maxValue )
|
|
{
|
|
#if !defined( __i386__ ) && !defined( __x86_64__ ) // Use original if not the x86 compiler.
|
|
// General computation for repeat
|
|
return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
|
|
#else // Otherwise, use this instead:
|
|
// Home the subtraction to a float to break up the sequence of x87
|
|
// instructions emitted by the VS compiler.
|
|
gFloatHome = fValue - floorf(fValue);
|
|
return gFloatHome * (float)maxValue;
|
|
#endif
|
|
}
|
|
|
|
float MirroredRepeatNormalizedAddressFn( float fValue, size_t maxValue )
|
|
{
|
|
// Round to nearest multiple of two
|
|
float s_prime = 2.0f * rintf( fValue * 0.5f ); // Note halfway values flip flop here due to rte, but they both end up pointing the same place at the end of the day
|
|
|
|
// Reduce to [-1, 1], Apply mirroring -> [0, 1]
|
|
s_prime = fabsf( fValue - s_prime );
|
|
|
|
// un-normalize
|
|
return s_prime * (float) maxValue;
|
|
}
|
|
|
|
struct AddressingTable
|
|
{
|
|
AddressingTable()
|
|
{
|
|
ct_assert( ( CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6 ) );
|
|
ct_assert( CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2 );
|
|
|
|
mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = NoAddressFn;
|
|
mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ] = NoAddressFn;
|
|
mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = RepeatAddressFn;
|
|
mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ] = RepeatAddressFn;
|
|
mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = ClampToEdgeNearestFn;
|
|
mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ] = ClampToEdgeLinearFn;
|
|
mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = ClampAddressFn;
|
|
mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ] = ClampAddressFn;
|
|
mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = MirroredRepeatAddressFn;
|
|
mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ] = MirroredRepeatAddressFn;
|
|
}
|
|
|
|
AddressFn operator[]( image_sampler_data *sampler )
|
|
{
|
|
return mTable[ (int)sampler->addressing_mode - CL_ADDRESS_NONE ][ (int)sampler->filter_mode - CL_FILTER_NEAREST ];
|
|
}
|
|
|
|
AddressFn mTable[ 6 ][ 2 ];
|
|
};
|
|
|
|
static AddressingTable sAddressingTable;
|
|
|
|
|
|
// Format helpers
|
|
|
|
int has_alpha(cl_image_format *format) {
|
|
switch (format->image_channel_order) {
|
|
case CL_R:
|
|
return 0;
|
|
case CL_A:
|
|
case CL_Rx:
|
|
return 1;
|
|
case CL_RG:
|
|
return 0;
|
|
case CL_RA:
|
|
case CL_RGx:
|
|
return 1;
|
|
case CL_RGB:
|
|
return 0;
|
|
case CL_RGBx:
|
|
return 1;
|
|
case CL_RGBA:
|
|
return 1;
|
|
case CL_BGRA:
|
|
return 1;
|
|
case CL_ARGB:
|
|
return 1;
|
|
case CL_INTENSITY:
|
|
return 1;
|
|
case CL_LUMINANCE:
|
|
return 0;
|
|
#ifdef CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE: return 1;
|
|
#endif
|
|
#ifdef CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE: return 1;
|
|
#endif
|
|
default:
|
|
log_error("Invalid image channel order: %d\n", format->image_channel_order);
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
#define PRINT_MAX_SIZE_LOGIC 0
|
|
|
|
#define SWAP( _a, _b ) do{ _a ^= _b; _b ^= _a; _a ^= _b; }while(0)
|
|
#ifndef MAX
|
|
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
|
#endif
|
|
|
|
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
|
size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
|
|
const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
|
|
const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
|
|
cl_mem_object_type image_type, cl_image_format *format) {
|
|
|
|
bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
|
|
bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
|
|
|
// Validate we have a reasonable max depth for 3D
|
|
if (is3D && maxDepth < 2) {
|
|
log_error("ERROR: Requesting max image sizes for 3D images when max depth is < 2.\n");
|
|
*numberOfSizes = 0;
|
|
return;
|
|
}
|
|
// Validate we have a reasonable max array size for 1D & 2D image arrays
|
|
if (isArray && maxArraySize < 2) {
|
|
log_error("ERROR: Requesting max image sizes for an image array when max array size is < 1.\n");
|
|
*numberOfSizes = 0;
|
|
return;
|
|
}
|
|
|
|
// Reduce the maximum because we are trying to test the max image dimensions, not the memory allocation
|
|
cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
|
|
cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
|
|
log_info("Note: max individual allocation adjusted down from %gMB to %gMB and max total allocation adjusted down from %gMB to %gMB.\n",
|
|
maxIndividualAllocSize/(1024.0*1024.0), adjustedMaxIndividualAllocSize/(1024.0*1024.0),
|
|
maxTotalAllocSize/(1024.0*1024.0), adjustedMaxTotalAllocSize/(1024.0*1024.0));
|
|
|
|
// Cap our max allocation to 1.5GB.
|
|
// FIXME -- why? In the interest of not taking a long time? We should still test this stuff...
|
|
if (adjustedMaxTotalAllocSize > (cl_ulong)2048*1024*1024) {
|
|
adjustedMaxTotalAllocSize = (cl_ulong)2048*1024*1024;
|
|
log_info("Limiting max total allocation size to %gMB (down from %gMB) for test.\n",
|
|
adjustedMaxTotalAllocSize/(1024.0*1024.0), maxTotalAllocSize/(1024.0*1024.0));
|
|
}
|
|
|
|
cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
|
|
if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize*2)
|
|
maxAllocSize = adjustedMaxTotalAllocSize/2;
|
|
|
|
size_t raw_pixel_size = get_pixel_size(format);
|
|
size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
|
|
|
|
log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, per-allocation limit %gMB.\n",
|
|
maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize/(1024.0*1024.0)));
|
|
|
|
// Keep track of the maximum sizes for each dimension
|
|
size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
|
|
|
|
switch (image_type) {
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
maximum_sizes[1] = maxArraySize;
|
|
maximum_sizes[2] = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
maximum_sizes[2] = maxArraySize;
|
|
break;
|
|
}
|
|
|
|
|
|
// Given one fixed sized dimension, this code finds one or two other dimensions,
|
|
// both with very small size, such that the size does not exceed the maximum
|
|
// passed to this function
|
|
|
|
size_t other_sizes[] = { 2, 7, 13, 18, 21, 29, 33, 36 };
|
|
static size_t other_size = 0;
|
|
enum { num_other_sizes = sizeof(other_sizes)/sizeof(size_t) };
|
|
|
|
(*numberOfSizes) = 0;
|
|
|
|
if (image_type == CL_MEM_OBJECT_IMAGE1D) {
|
|
|
|
double M = maximum_sizes[0];
|
|
|
|
// Store the size
|
|
sizes[(*numberOfSizes)][0] = (size_t)M;
|
|
sizes[(*numberOfSizes)][1] = 1;
|
|
sizes[(*numberOfSizes)][2] = 1;
|
|
++(*numberOfSizes);
|
|
}
|
|
|
|
else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D) {
|
|
|
|
for (int fixed_dim=0;fixed_dim<2;++fixed_dim) {
|
|
|
|
// Determine the size of the fixed dimension
|
|
double M = maximum_sizes[fixed_dim];
|
|
double A = max_pixels;
|
|
|
|
int x0_dim = !fixed_dim;
|
|
double x0 = fmin(fmin(other_sizes[(other_size++)%num_other_sizes],A/M), maximum_sizes[x0_dim]);
|
|
|
|
// Store the size
|
|
sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
|
|
sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
|
|
sizes[(*numberOfSizes)][2] = 1;
|
|
++(*numberOfSizes);
|
|
}
|
|
}
|
|
|
|
else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE3D) {
|
|
|
|
// Iterate over dimensions, finding sizes for the non-fixed dimension
|
|
for (int fixed_dim=0;fixed_dim<3;++fixed_dim) {
|
|
|
|
// Determine the size of the fixed dimension
|
|
double M = maximum_sizes[fixed_dim];
|
|
double A = max_pixels;
|
|
|
|
// Find two other dimensions, x0 and x1
|
|
int x0_dim = (fixed_dim == 0) ? 1 : 0;
|
|
int x1_dim = (fixed_dim == 2) ? 1 : 2;
|
|
|
|
// Choose two other sizes for these dimensions
|
|
double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
|
// GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
|
|
// testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
|
|
// it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
|
|
// width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
|
|
if(x0_dim == 0 && x0 < 16)
|
|
x0 = 16;
|
|
double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
|
// Store the size
|
|
sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
|
|
sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
|
|
sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
|
|
++(*numberOfSizes);
|
|
}
|
|
}
|
|
|
|
// Log the results
|
|
for (int j=0; j<(int)(*numberOfSizes); j++) {
|
|
switch (image_type) {
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
log_info(" size[%d] = [%ld] (%g MB image)\n",
|
|
j, sizes[j][0], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
log_info(" size[%d] = [%ld %ld] (%g MB image)\n",
|
|
j, sizes[j][0], sizes[j][1], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n",
|
|
j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
|
|
if (sampler->filter_mode == CL_FILTER_NEAREST)
|
|
return 0.0f;
|
|
|
|
switch (format->image_channel_data_type) {
|
|
case CL_SNORM_INT8:
|
|
return 1.0f/127.0f;
|
|
case CL_UNORM_INT8:
|
|
return 1.0f/255.0f;
|
|
case CL_UNORM_INT16:
|
|
return 1.0f/65535.0f;
|
|
case CL_SNORM_INT16:
|
|
return 1.0f/32767.0f;
|
|
case CL_FLOAT:
|
|
return CL_FLT_MIN;
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
return 0x1.0p-14f;
|
|
#endif
|
|
default:
|
|
return 0.0f;
|
|
}
|
|
}
|
|
|
|
float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter )
|
|
{
|
|
float maxError = 0.0f;
|
|
float sampleCount = 1.0f;
|
|
if( isLinearFilter )
|
|
sampleCount = is3D ? 8.0f : 4.0f;
|
|
|
|
// Note that the ULP is defined here as the unit in the last place of the maximum
|
|
// magnitude sample used for filtering.
|
|
|
|
// Section 8.3
|
|
switch( format->image_channel_data_type )
|
|
{
|
|
// The spec allows 2 ulps of error for normalized formats
|
|
case CL_SNORM_INT8:
|
|
case CL_UNORM_INT8:
|
|
case CL_SNORM_INT16:
|
|
case CL_UNORM_INT16:
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555:
|
|
case CL_UNORM_INT_101010:
|
|
maxError = 2*FLT_EPSILON*sampleCount; // Maximum sampling error for round to zero normalization based on multiplication
|
|
// by reciprocal (using reciprocal generated in round to +inf mode, so that 1.0 matches spec)
|
|
break;
|
|
|
|
// If the implementation supports these formats then it will have to allow rounding error here too,
|
|
// because not all 32-bit ints are exactly representable in float
|
|
case CL_SIGNED_INT32:
|
|
case CL_UNSIGNED_INT32:
|
|
maxError = 1*FLT_EPSILON;
|
|
break;
|
|
}
|
|
|
|
|
|
// Section 8.2
|
|
if( sampler->addressing_mode == CL_ADDRESS_REPEAT || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT || sampler->filter_mode != CL_FILTER_NEAREST || sampler->normalized_coords )
|
|
#if defined( __APPLE__ )
|
|
{
|
|
if( sampler->filter_mode != CL_FILTER_NEAREST )
|
|
{
|
|
extern cl_device_type gDeviceType;
|
|
// The maximum
|
|
if( gDeviceType == CL_DEVICE_TYPE_GPU )
|
|
maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4); // Some GPUs ain't so accurate
|
|
else
|
|
// The standard method of 2d linear filtering delivers 4.0 ulps of error in round to nearest (8 in rtz).
|
|
maxError += 4.0f * FLT_EPSILON;
|
|
}
|
|
else
|
|
maxError += 4.0f * FLT_EPSILON; // normalized coordinates will introduce some error into the fractional part of the address, affecting results
|
|
}
|
|
#else
|
|
{
|
|
#if !defined(_WIN32)
|
|
#warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
|
|
#endif
|
|
// The spec allows linear filters to return any result most of the time.
|
|
// That's fine for implementations but a problem for testing. After all
|
|
// users aren't going to like garbage images. We have "picked a number"
|
|
// here that we are going to attempt to conform to. Implementations are
|
|
// free to pick another number, like infinity, if they like.
|
|
// We picked a number for you, to provide /some/ sanity
|
|
maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
|
|
// ...but this is what the spec allows:
|
|
// maxError = INFINITY;
|
|
// Please feel free to pick any positive number. (NaN wont work.)
|
|
}
|
|
#endif
|
|
|
|
// The error calculation itself can introduce error
|
|
maxError += FLT_EPSILON * 2;
|
|
|
|
return maxError;
|
|
}
|
|
|
|
size_t get_format_max_int( cl_image_format *format )
|
|
{
|
|
switch( format->image_channel_data_type )
|
|
{
|
|
case CL_SNORM_INT8:
|
|
case CL_SIGNED_INT8:
|
|
return 127;
|
|
case CL_UNORM_INT8:
|
|
case CL_UNSIGNED_INT8:
|
|
return 255;
|
|
|
|
case CL_SNORM_INT16:
|
|
case CL_SIGNED_INT16:
|
|
return 32767;
|
|
|
|
case CL_UNORM_INT16:
|
|
case CL_UNSIGNED_INT16:
|
|
return 65535;
|
|
|
|
case CL_SIGNED_INT32:
|
|
return 2147483647L;
|
|
|
|
case CL_UNSIGNED_INT32:
|
|
return 4294967295LL;
|
|
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555:
|
|
return 31;
|
|
|
|
case CL_UNORM_INT_101010:
|
|
return 1023;
|
|
|
|
case CL_HALF_FLOAT:
|
|
return 1<<10;
|
|
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
return 16384;
|
|
#endif
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int get_format_min_int( cl_image_format *format )
|
|
{
|
|
switch( format->image_channel_data_type )
|
|
{
|
|
case CL_SNORM_INT8:
|
|
case CL_SIGNED_INT8:
|
|
return -128;
|
|
case CL_UNORM_INT8:
|
|
case CL_UNSIGNED_INT8:
|
|
return 0;
|
|
|
|
case CL_SNORM_INT16:
|
|
case CL_SIGNED_INT16:
|
|
return -32768;
|
|
|
|
case CL_UNORM_INT16:
|
|
case CL_UNSIGNED_INT16:
|
|
return 0;
|
|
|
|
case CL_SIGNED_INT32:
|
|
return -2147483648LL;
|
|
|
|
case CL_UNSIGNED_INT32:
|
|
return 0;
|
|
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555:
|
|
case CL_UNORM_INT_101010:
|
|
return 0;
|
|
|
|
case CL_HALF_FLOAT:
|
|
return -1<<10;
|
|
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
return -16384;
|
|
#endif
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
float convert_half_to_float( unsigned short halfValue )
|
|
{
|
|
// We have to take care of a few special cases, but in general, we just extract
|
|
// the same components from the half that exist in the float and re-stuff them
|
|
// For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
|
|
// Note: we store these in 32-bit ints to make the bit manipulations easier later
|
|
int sign = ( halfValue >> 15 ) & 0x0001;
|
|
int exponent = ( halfValue >> 10 ) & 0x001f;
|
|
int mantissa = ( halfValue ) & 0x03ff;
|
|
|
|
// Note: we use a union here to be able to access the bits of a float directly
|
|
union
|
|
{
|
|
unsigned int bits;
|
|
float floatValue;
|
|
} outFloat;
|
|
|
|
// Special cases first
|
|
if( exponent == 0 )
|
|
{
|
|
if( mantissa == 0 )
|
|
{
|
|
// If both exponent and mantissa are 0, the number is +/- 0
|
|
outFloat.bits = sign << 31;
|
|
return outFloat.floatValue; // Already done!
|
|
}
|
|
|
|
// If exponent is 0, it's a denormalized number, so we renormalize it
|
|
// Note: this is not terribly efficient, but oh well
|
|
while( ( mantissa & 0x00000400 ) == 0 )
|
|
{
|
|
mantissa <<= 1;
|
|
exponent--;
|
|
}
|
|
|
|
// The first bit is implicit, so we take it off and inc the exponent accordingly
|
|
exponent++;
|
|
mantissa &= ~(0x00000400);
|
|
}
|
|
else if( exponent == 31 ) // Special-case "numbers"
|
|
{
|
|
// If the exponent is 31, it's a special case number (+/- infinity or NAN).
|
|
// If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
|
|
// method is the same
|
|
outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
|
|
return outFloat.floatValue;
|
|
}
|
|
|
|
// Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
|
|
exponent += ( 127 - 15 );
|
|
mantissa <<= 13;
|
|
|
|
outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
|
|
return outFloat.floatValue;
|
|
}
|
|
|
|
|
|
|
|
cl_ushort convert_float_to_half( float f )
|
|
{
|
|
switch( gFloatToHalfRoundingMode )
|
|
{
|
|
case kRoundToNearestEven:
|
|
return float2half_rte( f );
|
|
case kRoundTowardZero:
|
|
return float2half_rtz( f );
|
|
default:
|
|
log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
|
|
exit(-1);
|
|
return 0xffff;
|
|
}
|
|
|
|
}
|
|
|
|
cl_ushort float2half_rte( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
|
|
return 0x7c00 | sign;
|
|
|
|
// underflow
|
|
if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// very small
|
|
if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
|
|
return sign | 1;
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
|
|
return sign | u.u;
|
|
}
|
|
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
|
|
u.u &= 0x7f800000;
|
|
x += u.f;
|
|
u.f = x - u.f;
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
cl_ushort float2half_rtz( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
|
|
{
|
|
if( x == INFINITY )
|
|
return 0x7c00 | sign;
|
|
|
|
return 0x7bff | sign;
|
|
}
|
|
|
|
// underflow
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
|
|
return (cl_ushort)((int) x | sign);
|
|
}
|
|
|
|
u.u &= 0xFFFFE000U;
|
|
u.u -= 0x38000000U;
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
class TEST
|
|
{
|
|
public:
|
|
TEST();
|
|
};
|
|
|
|
static TEST t;
|
|
void __vstore_half_rte(float f, size_t index, uint16_t *p)
|
|
{
|
|
union{ unsigned int u; float f;} u;
|
|
|
|
u.f = f;
|
|
unsigned short r = (u.u >> 16) & 0x8000;
|
|
u.u &= 0x7fffffff;
|
|
if( u.u >= 0x33000000U )
|
|
{
|
|
if( u.u >= 0x47800000 )
|
|
{
|
|
if( u.u <= 0x7f800000 )
|
|
r |= 0x7c00;
|
|
else
|
|
{
|
|
r |= 0x7e00 | ( (u.u >> 13) & 0x3ff );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
float x = u.f;
|
|
if( u.u < 0x38800000 )
|
|
u.u = 0x3f000000;
|
|
else
|
|
u.u += 0x06800000;
|
|
u.u &= 0x7f800000U;
|
|
x += u.f;
|
|
x -= u.f;
|
|
u.f = x * MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
|
|
u.u >>= 13;
|
|
r |= (unsigned short) u.u;
|
|
}
|
|
}
|
|
|
|
((unsigned short*)p)[index] = r;
|
|
}
|
|
|
|
TEST::TEST()
|
|
{
|
|
return;
|
|
union
|
|
{
|
|
float f;
|
|
uint32_t i;
|
|
} test;
|
|
uint16_t control, myval;
|
|
|
|
log_info(" &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
|
|
test.i = 0;
|
|
do
|
|
{
|
|
if( ( test.i & 0xffffff ) == 0 )
|
|
{
|
|
if( ( test.i & 0xfffffff ) == 0 )
|
|
log_info( "*" );
|
|
else
|
|
log_info( "." );
|
|
fflush(stdout);
|
|
}
|
|
__vstore_half_rte( test.f, 0, &control );
|
|
myval = convert_float_to_half( test.f );
|
|
if( myval != control )
|
|
{
|
|
log_info( "\n******** ERROR: MyVal %04x control %04x source %12.24f\n", myval, control, test.f );
|
|
log_info( " source bits: %08x %a\n", test.i, test.f );
|
|
float t, c;
|
|
c = convert_half_to_float( control );
|
|
t = convert_half_to_float( myval );
|
|
log_info( " converted control: %12.24f myval: %12.24f\n", c, t );
|
|
}
|
|
test.i++;
|
|
} while( test.i != 0 );
|
|
log_info("\n &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
|
|
|
|
}
|
|
|
|
extern bool gTestRounding;
|
|
uint64_t gRoundingStartValue = 0;
|
|
|
|
char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
|
|
{
|
|
size_t allocSize;
|
|
size_t pixelRowBytes = imageInfo->width * get_pixel_size( imageInfo->format );
|
|
size_t i;
|
|
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
allocSize = imageInfo->rowPitch;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
allocSize = imageInfo->height * imageInfo->rowPitch;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
allocSize = imageInfo->depth * imageInfo->slicePitch;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
allocSize = imageInfo->arraySize * imageInfo->slicePitch;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
allocSize = imageInfo->arraySize * imageInfo->slicePitch;
|
|
break;
|
|
default:
|
|
log_error("Cannot identify image type %x", imageInfo->type);
|
|
return 0;
|
|
}
|
|
|
|
#if defined (__APPLE__ )
|
|
char *data = NULL;
|
|
if (gDeviceType == CL_DEVICE_TYPE_CPU) {
|
|
size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
|
|
|
|
void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
|
intptr_t data_end = (intptr_t)map + mapSize - 4096;
|
|
data = (char *)(data_end - (intptr_t)allocSize);
|
|
|
|
mprotect(map, 4096, PROT_NONE);
|
|
mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
|
|
P.reset(data, map, mapSize,allocSize);
|
|
} else {
|
|
data = (char *)malloc(allocSize);
|
|
P.reset(data,NULL,0,allocSize);
|
|
}
|
|
#else
|
|
char *data = (char *)malloc(allocSize);
|
|
P.reset(data,NULL,0,allocSize);
|
|
#endif
|
|
|
|
if (data == NULL) {
|
|
log_error( "ERROR: Unable to malloc %lu bytes for generate_random_image_data\n", allocSize );
|
|
return 0;
|
|
}
|
|
|
|
if( gTestRounding )
|
|
{
|
|
// Special case: fill with a ramp from 0 to the size of the type
|
|
size_t typeSize = get_format_type_size( imageInfo->format );
|
|
switch( typeSize )
|
|
{
|
|
case 1:
|
|
{
|
|
char *ptr = data;
|
|
for( i = 0; i < allocSize; i++ )
|
|
ptr[i] = (cl_char) (i + gRoundingStartValue);
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
cl_short *ptr = (cl_short*) data;
|
|
for( i = 0; i < allocSize / 2; i++ )
|
|
ptr[i] = (cl_short) (i + gRoundingStartValue);
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
cl_int *ptr = (cl_int*) data;
|
|
for( i = 0; i < allocSize / 2; i++ )
|
|
ptr[i] = (cl_int) (i + gRoundingStartValue);
|
|
}
|
|
break;
|
|
}
|
|
return data;
|
|
}
|
|
|
|
// Otherwise, we should be able to just fill with random bits no matter what
|
|
cl_uint *p = (cl_uint*) data;
|
|
for( i = 0; i + 4 <= allocSize; i += 4 )
|
|
p[ i / 4 ] = genrand_int32(d);
|
|
|
|
for( ; i < allocSize; i++ )
|
|
data[i] = genrand_int32(d);
|
|
|
|
// Note: inf or nan float values would cause problems, although we don't know this will
|
|
// actually be a float, so we just know what to look for
|
|
unsigned int *intPtr = (unsigned int *)data;
|
|
for( i = 0; i < allocSize >> 2; i++ )
|
|
{
|
|
if( ( intPtr[ i ] & 0x7F800000 ) == 0x7F800000 )
|
|
intPtr[ i ] ^= 0x40000000;
|
|
}
|
|
|
|
// Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits = 0x7C00 are special)
|
|
unsigned short *shortPtr = (unsigned short *)data;
|
|
for( i = 0; i < allocSize >> 1; i++ )
|
|
{
|
|
if( ( shortPtr[ i ] & 0x7C00 ) == 0x7C00 )
|
|
shortPtr[ i ] ^= 0x4000;
|
|
}
|
|
|
|
// Fill unused edges with -1, NaN for float
|
|
if (imageInfo->rowPitch > pixelRowBytes)
|
|
{
|
|
size_t height = 0;
|
|
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
height = imageInfo->height;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
height = imageInfo->arraySize;
|
|
break;
|
|
}
|
|
|
|
// Fill in the row padding regions
|
|
for( i = 0; i < height; i++ )
|
|
{
|
|
size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
|
|
size_t length = imageInfo->rowPitch - pixelRowBytes;
|
|
memset( data + offset, 0xff, length );
|
|
}
|
|
}
|
|
|
|
// Fill in the slice padding regions, if necessary:
|
|
|
|
size_t slice_dimension = imageInfo->height;
|
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
slice_dimension = imageInfo->arraySize;
|
|
}
|
|
|
|
if (imageInfo->slicePitch > slice_dimension*imageInfo->rowPitch)
|
|
{
|
|
size_t depth = 0;
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
depth = imageInfo->depth;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
depth = imageInfo->arraySize;
|
|
break;
|
|
}
|
|
|
|
for( i = 0; i < depth; i++ )
|
|
{
|
|
size_t offset = i * imageInfo->slicePitch + slice_dimension*imageInfo->rowPitch;
|
|
size_t length = imageInfo->slicePitch - slice_dimension*imageInfo->rowPitch;
|
|
memset( data + offset, 0xff, length );
|
|
}
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
#define CLAMP_FLOAT( v ) ( fmaxf( fminf( v, 1.f ), -1.f ) )
|
|
|
|
|
|
void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
|
int x, int y, int z, float *outData )
|
|
{
|
|
if ( x < 0 || y < 0 || z < 0 || x >= (int)imageInfo->width
|
|
|| ( imageInfo->height != 0 && y >= (int)imageInfo->height )
|
|
|| ( imageInfo->depth != 0 && z >= (int)imageInfo->depth )
|
|
|| ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
|
|
{
|
|
// Border color
|
|
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
|
|
if (!has_alpha(imageInfo->format))
|
|
outData[3] = 1;
|
|
return;
|
|
}
|
|
|
|
cl_image_format *format = imageInfo->format;
|
|
|
|
unsigned int i;
|
|
float tempData[ 4 ];
|
|
|
|
// Advance to the right spot
|
|
char *ptr = (char *)imageData;
|
|
size_t pixelSize = get_pixel_size( format );
|
|
|
|
ptr += z * imageInfo->slicePitch + y * imageInfo->rowPitch + x * pixelSize;
|
|
|
|
// OpenCL only supports reading floats from certain formats
|
|
size_t channelCount = get_format_channel_count( format );
|
|
switch( format->image_channel_data_type )
|
|
{
|
|
case CL_SNORM_INT8:
|
|
{
|
|
char *dPtr = (char *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 127.0f );
|
|
break;
|
|
}
|
|
|
|
case CL_UNORM_INT8:
|
|
{
|
|
unsigned char *dPtr = (unsigned char *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ] / 255.0f;
|
|
break;
|
|
}
|
|
|
|
case CL_SIGNED_INT8:
|
|
{
|
|
cl_char *dPtr = (cl_char *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_UNSIGNED_INT8:
|
|
{
|
|
cl_uchar *dPtr = (cl_uchar *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float) dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_SNORM_INT16:
|
|
{
|
|
cl_short *dPtr = (cl_short *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 32767.0f );
|
|
break;
|
|
}
|
|
|
|
case CL_UNORM_INT16:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ] / 65535.0f;
|
|
break;
|
|
}
|
|
|
|
case CL_SIGNED_INT16:
|
|
{
|
|
cl_short *dPtr = (cl_short *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_UNSIGNED_INT16:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float) dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = convert_half_to_float( dPtr[ i ] );
|
|
break;
|
|
}
|
|
|
|
case CL_SIGNED_INT32:
|
|
{
|
|
cl_int *dPtr = (cl_int *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_UNSIGNED_INT32:
|
|
{
|
|
cl_uint *dPtr = (cl_uint *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_UNORM_SHORT_565:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
|
tempData[ 0 ] = (float)( dPtr[ 0 ] >> 11 ) / (float)31;
|
|
tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 63 ) / (float)63;
|
|
tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
|
|
break;
|
|
}
|
|
|
|
case CL_UNORM_SHORT_555:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
|
tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 31 ) / (float)31;
|
|
tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 31 ) / (float)31;
|
|
tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
|
|
break;
|
|
}
|
|
|
|
case CL_UNORM_INT_101010:
|
|
{
|
|
cl_uint *dPtr = (cl_uint *)ptr;
|
|
tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
|
|
tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
|
|
tempData[ 2 ] = (float)( dPtr[ 0 ] & 0x3ff ) / (float)1023;
|
|
break;
|
|
}
|
|
|
|
case CL_FLOAT:
|
|
{
|
|
float *dPtr = (float *)ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[ i ] = (float)dPtr[ i ];
|
|
break;
|
|
}
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
{
|
|
cl_ushort *dPtr = (cl_ushort*) ptr;
|
|
for( i = 0; i < channelCount; i++ )
|
|
tempData[i] = ((int) dPtr[i] - 16384) * 0x1.0p-14f;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
|
|
outData[ 3 ] = 1;
|
|
|
|
switch( format->image_channel_order )
|
|
{
|
|
case CL_A:
|
|
outData[ 3 ] = tempData[ 0 ];
|
|
break;
|
|
case CL_R:
|
|
case CL_Rx:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
break;
|
|
case CL_RA:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 3 ] = tempData[ 1 ];
|
|
break;
|
|
case CL_RG:
|
|
case CL_RGx:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 1 ] = tempData[ 1 ];
|
|
break;
|
|
case CL_RGB:
|
|
case CL_RGBx:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 1 ] = tempData[ 1 ];
|
|
outData[ 2 ] = tempData[ 2 ];
|
|
break;
|
|
case CL_RGBA:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 1 ] = tempData[ 1 ];
|
|
outData[ 2 ] = tempData[ 2 ];
|
|
outData[ 3 ] = tempData[ 3 ];
|
|
break;
|
|
case CL_ARGB:
|
|
outData[ 0 ] = tempData[ 1 ];
|
|
outData[ 1 ] = tempData[ 2 ];
|
|
outData[ 2 ] = tempData[ 3 ];
|
|
outData[ 3 ] = tempData[ 0 ];
|
|
break;
|
|
case CL_BGRA:
|
|
outData[ 0 ] = tempData[ 2 ];
|
|
outData[ 1 ] = tempData[ 1 ];
|
|
outData[ 2 ] = tempData[ 0 ];
|
|
outData[ 3 ] = tempData[ 3 ];
|
|
break;
|
|
case CL_INTENSITY:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 1 ] = tempData[ 0 ];
|
|
outData[ 2 ] = tempData[ 0 ];
|
|
outData[ 3 ] = tempData[ 0 ];
|
|
break;
|
|
case CL_LUMINANCE:
|
|
outData[ 0 ] = tempData[ 0 ];
|
|
outData[ 1 ] = tempData[ 0 ];
|
|
outData[ 2 ] = tempData[ 0 ];
|
|
break;
|
|
#ifdef CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE:
|
|
outData[ 0 ] = tempData[ 1 ];
|
|
outData[ 1 ] = tempData[ 2 ];
|
|
outData[ 2 ] = tempData[ 3 ];
|
|
outData[ 3 ] = 1.0f;
|
|
break;
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE:
|
|
outData[ 0 ] = tempData[ 2 ];
|
|
outData[ 1 ] = tempData[ 1 ];
|
|
outData[ 2 ] = tempData[ 0 ];
|
|
outData[ 3 ] = 1.0f;
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("Invalid format:");
|
|
print_header(format, true);
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool get_integer_coords( float x, float y, float z, size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ ) {
|
|
return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height, depth, imageSampler, imageInfo, outX, outY, outZ);
|
|
}
|
|
|
|
bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
|
size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ )
|
|
{
|
|
AddressFn adFn = sAddressingTable[ imageSampler ];
|
|
|
|
float refX = floorf( x ), refY = floorf( y ), refZ = floorf( z );
|
|
|
|
if( imageSampler->normalized_coords )
|
|
{
|
|
switch (imageSampler->addressing_mode)
|
|
{
|
|
case CL_ADDRESS_REPEAT:
|
|
x = RepeatNormalizedAddressFn( x, width );
|
|
if (height != 0) {
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
y = RepeatNormalizedAddressFn( y, height );
|
|
}
|
|
if (depth != 0) {
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
z = RepeatNormalizedAddressFn( z, depth );
|
|
}
|
|
|
|
if (xAddressOffset != 0.0) {
|
|
// Add in the offset
|
|
x += xAddressOffset;
|
|
// Handle wrapping
|
|
if (x > width)
|
|
x -= (float)width;
|
|
if (x < 0)
|
|
x += (float)width;
|
|
}
|
|
if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
|
|
// Add in the offset
|
|
y += yAddressOffset;
|
|
// Handle wrapping
|
|
if (y > height)
|
|
y -= (float)height;
|
|
if (y < 0)
|
|
y += (float)height;
|
|
}
|
|
if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) ) {
|
|
// Add in the offset
|
|
z += zAddressOffset;
|
|
// Handle wrapping
|
|
if (z > depth)
|
|
z -= (float)depth;
|
|
if (z < 0)
|
|
z += (float)depth;
|
|
}
|
|
break;
|
|
|
|
case CL_ADDRESS_MIRRORED_REPEAT:
|
|
x = MirroredRepeatNormalizedAddressFn( x, width );
|
|
if (height != 0) {
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
y = MirroredRepeatNormalizedAddressFn( y, height );
|
|
}
|
|
if (depth != 0) {
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
z = MirroredRepeatNormalizedAddressFn( z, depth );
|
|
}
|
|
|
|
if (xAddressOffset != 0.0)
|
|
{
|
|
float temp = x + xAddressOffset;
|
|
if( temp > (float) width )
|
|
temp = (float) width - (temp - (float) width );
|
|
x = fabsf( temp );
|
|
}
|
|
if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
|
|
float temp = y + yAddressOffset;
|
|
if( temp > (float) height )
|
|
temp = (float) height - (temp - (float) height );
|
|
y = fabsf( temp );
|
|
}
|
|
if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) ) {
|
|
float temp = z + zAddressOffset;
|
|
if( temp > (float) depth )
|
|
temp = (float) depth - (temp - (float) depth );
|
|
z = fabsf( temp );
|
|
}
|
|
break;
|
|
|
|
default:
|
|
// Also, remultiply to the original coords. This simulates any truncation in
|
|
// the pass to OpenCL
|
|
x = (x * (float)width) + xAddressOffset;
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
y = (y * (float)height) + yAddressOffset;
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
z = (z * (float)depth) + zAddressOffset;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// At this point, we're dealing with non-normalized coordinates.
|
|
|
|
outX = adFn( floorf( x ), width );
|
|
|
|
// 1D and 2D arrays require special care for the index coordinate:
|
|
|
|
switch (imageInfo->type) {
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
|
|
outZ = 0.0f; /* don't care! */
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
outY = adFn( floorf( y ), height );
|
|
outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
|
|
break;
|
|
default:
|
|
// legacy path:
|
|
if (height != 0)
|
|
outY = adFn( floorf( y ), height );
|
|
if( depth != 0 )
|
|
outZ = adFn( floorf( z ), depth );
|
|
}
|
|
|
|
|
|
|
|
return !( (int)refX == outX && (int)refY == outY && (int)refZ == outZ );
|
|
}
|
|
|
|
static float frac(float a) {
|
|
return a - floorf(a);
|
|
}
|
|
|
|
static inline void pixelMax( const float a[4], const float b[4], float *results );
|
|
static inline void pixelMax( const float a[4], const float b[4], float *results )
|
|
{
|
|
for( int i = 0; i < 4; i++ )
|
|
results[i] = errMax( fabsf(a[i]), fabsf(b[i]) );
|
|
}
|
|
|
|
// If containsDenorms is NULL, flush denorms to zero
|
|
// if containsDenorms is not NULL, record whether there are any denorms
|
|
static inline void check_for_denorms(float a[4], int *containsDenorms );
|
|
static inline void check_for_denorms(float a[4], int *containsDenorms )
|
|
{
|
|
if( NULL == containsDenorms )
|
|
{
|
|
for( int i = 0; i < 4; i++ )
|
|
{
|
|
if( fabsf(a[i]) < FLT_MIN )
|
|
a[i] = copysignf( 0.0f, a[i] );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( int i = 0; i < 4; i++ )
|
|
{
|
|
if( fabs(a[i]) < FLT_MIN )
|
|
{
|
|
*containsDenorms = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
inline float calculate_array_index( float coord, float extent ) {
|
|
// from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
|
|
//
|
|
// given coordinate 'w' that represents an index:
|
|
// layer_index = clamp( floor(w + 0.5f), 0.0f, max_value_for_w )
|
|
|
|
float ret = floorf( coord + 0.5f );
|
|
ret = ret > extent ? extent : ret;
|
|
ret = ret < 0.0f ? 0.0f : ret;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Utility function to unnormalized a coordinate given a particular sampler.
|
|
*
|
|
* name - the name of the coordinate, used for verbose debugging only
|
|
* coord - the coordinate requiring unnormalization
|
|
* offset - an addressing offset to be added to the coordinate
|
|
* extent - the max value for this coordinate (e.g. width for x)
|
|
*/
|
|
static float unnormalize_coordinate( const char* name, float coord,
|
|
float offset, float extent, cl_addressing_mode addressing_mode, int verbose )
|
|
{
|
|
float ret = 0.0f;
|
|
|
|
switch (addressing_mode) {
|
|
case CL_ADDRESS_REPEAT:
|
|
ret = RepeatNormalizedAddressFn( coord, extent );
|
|
|
|
if ( verbose ) {
|
|
log_info( "\tRepeat filter denormalizes %s (%f) to %f\n",
|
|
name, coord, ret );
|
|
}
|
|
|
|
if (offset != 0.0) {
|
|
// Add in the offset, and handle wrapping.
|
|
ret += offset;
|
|
if (ret > extent) ret -= extent;
|
|
if (ret < 0.0) ret += extent;
|
|
}
|
|
|
|
if (verbose && offset != 0.0f) {
|
|
log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
|
|
}
|
|
break;
|
|
|
|
case CL_ADDRESS_MIRRORED_REPEAT:
|
|
ret = MirroredRepeatNormalizedAddressFn( coord, extent );
|
|
|
|
if ( verbose ) {
|
|
log_info( "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
|
|
name, coord, ret );
|
|
}
|
|
|
|
if (offset != 0.0) {
|
|
float temp = ret + offset;
|
|
if( temp > extent )
|
|
temp = extent - (temp - extent );
|
|
ret = fabsf( temp );
|
|
}
|
|
|
|
if (verbose && offset != 0.0f) {
|
|
log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
|
|
}
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = coord * extent;
|
|
|
|
if ( verbose ) {
|
|
log_info( "\tFilter denormalizes %s (%f) to %f\n",
|
|
name, coord, ret );
|
|
}
|
|
|
|
ret += offset;
|
|
|
|
if (verbose && offset != 0.0f) {
|
|
log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
|
float x, float y, float z,
|
|
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms ) {
|
|
return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms);
|
|
}
|
|
|
|
// returns max pixel value of the pixels touched
|
|
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
|
|
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
|
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms )
|
|
{
|
|
AddressFn adFn = sAddressingTable[ imageSampler ];
|
|
FloatPixel returnVal;
|
|
|
|
if( containsDenorms )
|
|
*containsDenorms = 0;
|
|
|
|
if( imageSampler->normalized_coords ) {
|
|
|
|
// We need to unnormalize our coordinates differently depending on
|
|
// the image type, but 'x' is always processed the same way.
|
|
|
|
x = unnormalize_coordinate("x", x, xAddressOffset, (float)imageInfo->width,
|
|
imageSampler->addressing_mode, verbose);
|
|
|
|
switch (imageInfo->type) {
|
|
|
|
// The image array types require special care:
|
|
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
z = 0; // don't care -- unused for 1D arrays
|
|
break;
|
|
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
y = unnormalize_coordinate("y", y, yAddressOffset, (float)imageInfo->height,
|
|
imageSampler->addressing_mode, verbose);
|
|
break;
|
|
|
|
// Everybody else:
|
|
|
|
default:
|
|
y = unnormalize_coordinate("y", y, yAddressOffset, (float)imageInfo->height,
|
|
imageSampler->addressing_mode, verbose);
|
|
z = unnormalize_coordinate("z", z, zAddressOffset, (float)imageInfo->depth,
|
|
imageSampler->addressing_mode, verbose);
|
|
}
|
|
|
|
} else if ( verbose ) {
|
|
|
|
switch (imageInfo->type) {
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
log_info("Starting coordinate: %f, array index %f\n", x, y);
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
log_info("Starting coordinate: %f, %f, array index %f\n", x, y, z);
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
|
log_info("Starting coordinate: %f\b", x);
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
log_info("Starting coordinate: %f, %f\n", x, y);
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
default:
|
|
log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
|
|
}
|
|
}
|
|
|
|
// At this point, we have unnormalized coordinates.
|
|
|
|
if( imageSampler->filter_mode == CL_FILTER_NEAREST )
|
|
{
|
|
int ix, iy, iz;
|
|
|
|
// We apply the addressing function to the now-unnormalized
|
|
// coordinates. Note that the array cases again require special
|
|
// care, per section 8.4 in the OpenCL 1.2 Specification.
|
|
|
|
ix = adFn( floorf( x ), imageInfo->width );
|
|
|
|
switch (imageInfo->type) {
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
iy = calculate_array_index( y, (float)(imageInfo->arraySize - 1) );
|
|
iz = 0;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
iy = adFn( floorf( y ), imageInfo->height );
|
|
iz = calculate_array_index( z, (float)(imageInfo->arraySize - 1) );
|
|
break;
|
|
default:
|
|
iy = adFn( floorf( y ), imageInfo->height );
|
|
if( imageInfo->depth != 0 )
|
|
iz = adFn( floorf( z ), imageInfo->depth );
|
|
else
|
|
iz = 0;
|
|
}
|
|
|
|
if( verbose ) {
|
|
if( iz )
|
|
log_info( "\tActual integer coords used (i = floor(x)): { %d, %d, %d }\n", ix, iy, iz );
|
|
else
|
|
log_info( "\tActual integer coords used (i = floor(x)): { %d, %d }\n", ix, iy );
|
|
}
|
|
|
|
read_image_pixel_float( imageData, imageInfo, ix, iy, iz, outData );
|
|
check_for_denorms( outData, containsDenorms );
|
|
for( int i = 0; i < 4; i++ )
|
|
returnVal.p[i] = fabsf( outData[i] );
|
|
return returnVal;
|
|
}
|
|
else
|
|
{
|
|
// Linear filtering cases.
|
|
|
|
size_t width = imageInfo->width, height = imageInfo->height, depth = imageInfo->depth;
|
|
|
|
// Image arrays can use 2D filtering, but require us to walk into the
|
|
// image a certain number of slices before reading.
|
|
|
|
if( depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
|
|
imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
{
|
|
size_t layer_offset = 0;
|
|
|
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
|
|
layer_offset = imageInfo->slicePitch * (size_t)calculate_array_index(
|
|
z, (float)(imageInfo->arraySize - 1)
|
|
);
|
|
}
|
|
else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
layer_offset = imageInfo->slicePitch * (size_t)calculate_array_index(
|
|
y, (float)(imageInfo->arraySize - 1)
|
|
);
|
|
|
|
// Set up y and height so that the filtering below is correct
|
|
// 1D filtering on a single slice.
|
|
height = 1;
|
|
}
|
|
|
|
int x1 = adFn( floorf( x - 0.5f ), width );
|
|
int y1 = 0;
|
|
int x2 = adFn( floorf( x - 0.5f ) + 1, width );
|
|
int y2 = 0;
|
|
if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) &&
|
|
(imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
|
|
(imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
|
|
y1 = adFn( floorf( y - 0.5f ), height );
|
|
y2 = adFn( floorf( y - 0.5f ) + 1, height );
|
|
} else {
|
|
y = 0.5f;
|
|
}
|
|
|
|
if( verbose )
|
|
log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d } and i1:{%d, %d }\n", x1, y1, x2, y2 );
|
|
|
|
// Walk to beginning of the 'correct' slice, if needed.
|
|
char* imgPtr = ((char*)imageData) + layer_offset;
|
|
|
|
float upLeft[ 4 ], upRight[ 4 ], lowLeft[ 4 ], lowRight[ 4 ];
|
|
float maxUp[4], maxLow[4];
|
|
read_image_pixel_float( imgPtr, imageInfo, x1, y1, 0, upLeft );
|
|
read_image_pixel_float( imgPtr, imageInfo, x2, y1, 0, upRight );
|
|
check_for_denorms( upLeft, containsDenorms );
|
|
check_for_denorms( upRight, containsDenorms );
|
|
pixelMax( upLeft, upRight, maxUp );
|
|
read_image_pixel_float( imgPtr, imageInfo, x1, y2, 0, lowLeft );
|
|
read_image_pixel_float( imgPtr, imageInfo, x2, y2, 0, lowRight );
|
|
check_for_denorms( lowLeft, containsDenorms );
|
|
check_for_denorms( lowRight, containsDenorms );
|
|
pixelMax( lowLeft, lowRight, maxLow );
|
|
pixelMax( maxUp, maxLow, returnVal.p );
|
|
|
|
if( verbose )
|
|
{
|
|
if( NULL == containsDenorms )
|
|
log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
|
|
else
|
|
log_info( "\tSampled pixels (rgba order):\n" );
|
|
log_info( "\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1], upLeft[2], upLeft[3] );
|
|
log_info( "\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1], upRight[2], upRight[3] );
|
|
log_info( "\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1], lowLeft[2], lowLeft[3] );
|
|
log_info( "\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1], lowRight[2], lowRight[3] );
|
|
}
|
|
|
|
bool printMe = false;
|
|
if( x1 <= 0 || x2 <= 0 || x1 >= (int)width-1 || x2 >= (int)width-1 )
|
|
printMe = true;
|
|
if( y1 <= 0 || y2 <= 0 || y1 >= (int)height-1 || y2 >= (int)height-1 )
|
|
printMe = true;
|
|
|
|
double weights[ 2 ][ 2 ];
|
|
|
|
weights[ 0 ][ 0 ] = weights[ 0 ][ 1 ] = 1.0 - frac( x - 0.5f );
|
|
weights[ 1 ][ 0 ] = weights[ 1 ][ 1 ] = frac( x - 0.5f );
|
|
weights[ 0 ][ 0 ] *= 1.0 - frac( y - 0.5f );
|
|
weights[ 1 ][ 0 ] *= 1.0 - frac( y - 0.5f );
|
|
weights[ 0 ][ 1 ] *= frac( y - 0.5f );
|
|
weights[ 1 ][ 1 ] *= frac( y - 0.5f );
|
|
|
|
if( verbose )
|
|
log_info( "\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f\n", frac( x - 0.5f ), frac( y - 0.5f ) );
|
|
|
|
for( int i = 0; i < 4; i++ )
|
|
{
|
|
outData[ i ] = (float)( ( upLeft[ i ] * weights[ 0 ][ 0 ] ) +
|
|
( upRight[ i ] * weights[ 1 ][ 0 ] ) +
|
|
( lowLeft[ i ] * weights[ 0 ][ 1 ] ) +
|
|
( lowRight[ i ] * weights[ 1 ][ 1 ] ));
|
|
|
|
// flush subnormal results to zero if necessary
|
|
if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
|
|
outData[i] = copysignf( 0.0f, outData[i] );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// 3D linear filtering
|
|
int x1 = adFn( floorf( x - 0.5f ), width );
|
|
int y1 = adFn( floorf( y - 0.5f ), height );
|
|
int z1 = adFn( floorf( z - 0.5f ), depth );
|
|
int x2 = adFn( floorf( x - 0.5f ) + 1, width );
|
|
int y2 = adFn( floorf( y - 0.5f ) + 1, height );
|
|
int z2 = adFn( floorf( z - 0.5f ) + 1, depth );
|
|
|
|
if( verbose )
|
|
log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d, %d} and i1:{%d, %d, %d}\n", x1, y1, z1, x2, y2, z2 );
|
|
|
|
float upLeftA[ 4 ], upRightA[ 4 ], lowLeftA[ 4 ], lowRightA[ 4 ];
|
|
float upLeftB[ 4 ], upRightB[ 4 ], lowLeftB[ 4 ], lowRightB[ 4 ];
|
|
float pixelMaxA[4], pixelMaxB[4];
|
|
read_image_pixel_float( imageData, imageInfo, x1, y1, z1, upLeftA );
|
|
read_image_pixel_float( imageData, imageInfo, x2, y1, z1, upRightA );
|
|
check_for_denorms( upLeftA, containsDenorms );
|
|
check_for_denorms( upRightA, containsDenorms );
|
|
pixelMax( upLeftA, upRightA, pixelMaxA );
|
|
read_image_pixel_float( imageData, imageInfo, x1, y2, z1, lowLeftA );
|
|
read_image_pixel_float( imageData, imageInfo, x2, y2, z1, lowRightA );
|
|
check_for_denorms( lowLeftA, containsDenorms );
|
|
check_for_denorms( lowRightA, containsDenorms );
|
|
pixelMax( lowLeftA, lowRightA, pixelMaxB );
|
|
pixelMax( pixelMaxA, pixelMaxB, returnVal.p);
|
|
read_image_pixel_float( imageData, imageInfo, x1, y1, z2, upLeftB );
|
|
read_image_pixel_float( imageData, imageInfo, x2, y1, z2, upRightB );
|
|
check_for_denorms( upLeftB, containsDenorms );
|
|
check_for_denorms( upRightB, containsDenorms );
|
|
pixelMax( upLeftB, upRightB, pixelMaxA );
|
|
read_image_pixel_float( imageData, imageInfo, x1, y2, z2, lowLeftB );
|
|
read_image_pixel_float( imageData, imageInfo, x2, y2, z2, lowRightB );
|
|
check_for_denorms( lowLeftB, containsDenorms );
|
|
check_for_denorms( lowRightB, containsDenorms );
|
|
pixelMax( lowLeftB, lowRightB, pixelMaxB );
|
|
pixelMax( pixelMaxA, pixelMaxB, pixelMaxA);
|
|
pixelMax( pixelMaxA, returnVal.p, returnVal.p );
|
|
|
|
if( verbose )
|
|
{
|
|
if( NULL == containsDenorms )
|
|
log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
|
|
else
|
|
log_info( "\tSampled pixels (rgba order):\n" );
|
|
log_info( "\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1], upLeftA[2], upLeftA[3] );
|
|
log_info( "\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1], upRightA[2], upRightA[3] );
|
|
log_info( "\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1], lowLeftA[2], lowLeftA[3] );
|
|
log_info( "\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0], lowRightA[1], lowRightA[2], lowRightA[3] );
|
|
log_info( "\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1], upLeftB[2], upLeftB[3] );
|
|
log_info( "\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1], upRightB[2], upRightB[3] );
|
|
log_info( "\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1], lowLeftB[2], lowLeftB[3] );
|
|
log_info( "\t\tp111: %f, %f, %f, %f\n", lowRightB[0], lowRightB[1], lowRightB[2], lowRightB[3] );
|
|
}
|
|
|
|
double weights[ 2 ][ 2 ][ 2 ];
|
|
|
|
float a = frac( x - 0.5f ), b = frac( y - 0.5f ), c = frac( z - 0.5f );
|
|
weights[ 0 ][ 0 ][ 0 ] = weights[ 0 ][ 1 ][ 0 ] = weights[ 0 ][ 0 ][ 1 ] = weights[ 0 ][ 1 ][ 1 ] = 1.f - a;
|
|
weights[ 1 ][ 0 ][ 0 ] = weights[ 1 ][ 1 ][ 0 ] = weights[ 1 ][ 0 ][ 1 ] = weights[ 1 ][ 1 ][ 1 ] = a;
|
|
weights[ 0 ][ 0 ][ 0 ] *= 1.f - b;
|
|
weights[ 1 ][ 0 ][ 0 ] *= 1.f - b;
|
|
weights[ 0 ][ 0 ][ 1 ] *= 1.f - b;
|
|
weights[ 1 ][ 0 ][ 1 ] *= 1.f - b;
|
|
weights[ 0 ][ 1 ][ 0 ] *= b;
|
|
weights[ 1 ][ 1 ][ 0 ] *= b;
|
|
weights[ 0 ][ 1 ][ 1 ] *= b;
|
|
weights[ 1 ][ 1 ][ 1 ] *= b;
|
|
weights[ 0 ][ 0 ][ 0 ] *= 1.f - c;
|
|
weights[ 0 ][ 1 ][ 0 ] *= 1.f - c;
|
|
weights[ 1 ][ 0 ][ 0 ] *= 1.f - c;
|
|
weights[ 1 ][ 1 ][ 0 ] *= 1.f - c;
|
|
weights[ 0 ][ 0 ][ 1 ] *= c;
|
|
weights[ 0 ][ 1 ][ 1 ] *= c;
|
|
weights[ 1 ][ 0 ][ 1 ] *= c;
|
|
weights[ 1 ][ 1 ][ 1 ] *= c;
|
|
|
|
if( verbose )
|
|
log_info( "\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f, frac( z - 0.5f ) = %f\n",
|
|
frac( x - 0.5f ), frac( y - 0.5f ), frac( z - 0.5f ) );
|
|
|
|
for( int i = 0; i < 4; i++ )
|
|
{
|
|
outData[ i ] = (float)( ( upLeftA[ i ] * weights[ 0 ][ 0 ][ 0 ] ) +
|
|
( upRightA[ i ] * weights[ 1 ][ 0 ][ 0 ] ) +
|
|
( lowLeftA[ i ] * weights[ 0 ][ 1 ][ 0 ] ) +
|
|
( lowRightA[ i ] * weights[ 1 ][ 1 ][ 0 ] ) +
|
|
( upLeftB[ i ] * weights[ 0 ][ 0 ][ 1 ] ) +
|
|
( upRightB[ i ] * weights[ 1 ][ 0 ][ 1 ] ) +
|
|
( lowLeftB[ i ] * weights[ 0 ][ 1 ][ 1 ] ) +
|
|
( lowRightB[ i ] * weights[ 1 ][ 1 ][ 1 ] ));
|
|
|
|
// flush subnormal results to zero if necessary
|
|
if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
|
|
outData[i] = copysignf( 0.0f, outData[i] );
|
|
}
|
|
}
|
|
|
|
return returnVal;
|
|
}
|
|
}
|
|
|
|
|
|
int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
|
|
void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ )
|
|
{
|
|
int foundCount = 0;
|
|
char *iPtr = (char *)imagePtr;
|
|
size_t width;
|
|
size_t depth;
|
|
size_t height;
|
|
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
width = imageInfo->width; height = 1; depth = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
width = imageInfo->width; height = 1; depth = imageInfo->arraySize;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
width = imageInfo->width; height = imageInfo->height; depth = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
width = imageInfo->width; height = imageInfo->height; depth = imageInfo->arraySize;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
width = imageInfo->width; height = imageInfo->height; depth = imageInfo->depth;
|
|
break;
|
|
}
|
|
|
|
for( size_t z = 0; z < depth; z++ )
|
|
{
|
|
for( size_t y = 0; y < height; y++ )
|
|
{
|
|
for( size_t x = 0; x < imageInfo->width; x++)
|
|
{
|
|
if( memcmp( iPtr, vectorToFind, vectorSize ) == 0 )
|
|
{
|
|
if( foundCount == 0 )
|
|
{
|
|
*outX = (int)x;
|
|
if (outY != NULL)
|
|
*outY = (int)y;
|
|
if( outZ != NULL )
|
|
*outZ = (int)z;
|
|
}
|
|
foundCount++;
|
|
}
|
|
iPtr += vectorSize;
|
|
}
|
|
iPtr += imageInfo->rowPitch - ( width * vectorSize );
|
|
}
|
|
iPtr += imageInfo->slicePitch - ( height * imageInfo->rowPitch );
|
|
}
|
|
return foundCount;
|
|
}
|
|
|
|
int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
|
unsigned int *valuesToFind, int *outX, int *outY, int *outZ )
|
|
{
|
|
char vectorToFind[ 4 * 4 ];
|
|
size_t vectorSize = get_format_channel_count( imageInfo->format );
|
|
|
|
|
|
if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8 )
|
|
{
|
|
unsigned char *p = (unsigned char *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (unsigned char)valuesToFind[i];
|
|
}
|
|
else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16 )
|
|
{
|
|
unsigned short *p = (unsigned short *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (unsigned short)valuesToFind[i];
|
|
vectorSize *= 2;
|
|
}
|
|
else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32 )
|
|
{
|
|
unsigned int *p = (unsigned int *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (unsigned int)valuesToFind[i];
|
|
vectorSize *= 4;
|
|
}
|
|
else
|
|
{
|
|
log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
|
|
return false;
|
|
}
|
|
return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ );
|
|
}
|
|
|
|
int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
|
int *valuesToFind, int *outX, int *outY, int *outZ )
|
|
{
|
|
char vectorToFind[ 4 * 4 ];
|
|
size_t vectorSize = get_format_channel_count( imageInfo->format );
|
|
|
|
if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT8 )
|
|
{
|
|
char *p = (char *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (char)valuesToFind[i];
|
|
}
|
|
else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT16 )
|
|
{
|
|
short *p = (short *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (short)valuesToFind[i];
|
|
vectorSize *= 2;
|
|
}
|
|
else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT32 )
|
|
{
|
|
int *p = (int *)vectorToFind;
|
|
for( unsigned int i = 0; i < vectorSize; i++ )
|
|
p[i] = (int)valuesToFind[i];
|
|
vectorSize *= 4;
|
|
}
|
|
else
|
|
{
|
|
log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
|
|
return false;
|
|
}
|
|
return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ );
|
|
}
|
|
|
|
int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
|
float *valuesToFind, int *outX, int *outY, int *outZ )
|
|
{
|
|
char vectorToFind[ 4 * 4 ];
|
|
float swizzled[4];
|
|
memcpy( swizzled, valuesToFind, sizeof( swizzled ) );
|
|
size_t vectorSize = get_pixel_size( imageInfo->format );
|
|
pack_image_pixel( swizzled, imageInfo->format, vectorToFind );
|
|
return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ );
|
|
}
|
|
|
|
template <class T> void swizzle_vector_for_image( T *srcVector, const cl_image_format *imageFormat )
|
|
{
|
|
T temp;
|
|
switch( imageFormat->image_channel_order )
|
|
{
|
|
case CL_A:
|
|
srcVector[ 0 ] = srcVector[ 3 ];
|
|
break;
|
|
case CL_R:
|
|
case CL_Rx:
|
|
case CL_RG:
|
|
case CL_RGx:
|
|
case CL_RGB:
|
|
case CL_RGBx:
|
|
case CL_RGBA:
|
|
break;
|
|
case CL_RA:
|
|
srcVector[ 1 ] = srcVector[ 3 ];
|
|
break;
|
|
case CL_ARGB:
|
|
temp = srcVector[ 3 ];
|
|
srcVector[ 3 ] = srcVector[ 2 ];
|
|
srcVector[ 2 ] = srcVector[ 1 ];
|
|
srcVector[ 1 ] = srcVector[ 0 ];
|
|
srcVector[ 0 ] = temp;
|
|
break;
|
|
case CL_BGRA:
|
|
temp = srcVector[ 0 ];
|
|
srcVector[ 0 ] = srcVector[ 2 ];
|
|
srcVector[ 2 ] = temp;
|
|
break;
|
|
case CL_INTENSITY:
|
|
srcVector[ 3 ] = srcVector[ 0 ];
|
|
srcVector[ 2 ] = srcVector[ 0 ];
|
|
srcVector[ 1 ] = srcVector[ 0 ];
|
|
break;
|
|
case CL_LUMINANCE:
|
|
srcVector[ 2 ] = srcVector[ 0 ];
|
|
srcVector[ 1 ] = srcVector[ 0 ];
|
|
break;
|
|
#ifdef CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE:
|
|
temp = srcVector[ 3 ];
|
|
srcVector[ 3 ] = srcVector[ 2 ];
|
|
srcVector[ 2 ] = srcVector[ 1 ];
|
|
srcVector[ 1 ] = srcVector[ 0 ];
|
|
srcVector[ 0 ] = temp;
|
|
break;
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE:
|
|
temp = srcVector[ 0 ];
|
|
srcVector[ 0 ] = srcVector[ 2 ];
|
|
srcVector[ 2 ] = temp;
|
|
break;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#define SATURATE( v, min, max ) ( v < min ? min : ( v > max ? max : v ) )
|
|
|
|
void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData )
|
|
{
|
|
swizzle_vector_for_image<unsigned int>( srcVector, imageFormat );
|
|
size_t channelCount = get_format_channel_count( imageFormat );
|
|
|
|
switch( imageFormat->image_channel_data_type )
|
|
{
|
|
case CL_UNSIGNED_INT8:
|
|
{
|
|
unsigned char *ptr = (unsigned char *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (unsigned char)SATURATE( srcVector[ i ], 0, 255 );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT16:
|
|
{
|
|
unsigned short *ptr = (unsigned short *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (unsigned short)SATURATE( srcVector[ i ], 0, 65535 );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT32:
|
|
{
|
|
unsigned int *ptr = (unsigned int *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (unsigned int)srcVector[ i ];
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData )
|
|
{
|
|
swizzle_vector_for_image<int>( srcVector, imageFormat );
|
|
size_t chanelCount = get_format_channel_count( imageFormat );
|
|
|
|
switch( imageFormat->image_channel_data_type )
|
|
{
|
|
case CL_SIGNED_INT8:
|
|
{
|
|
char *ptr = (char *)outData;
|
|
for( unsigned int i = 0; i < chanelCount; i++ )
|
|
ptr[ i ] = (char)SATURATE( srcVector[ i ], -128, 127 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT16:
|
|
{
|
|
short *ptr = (short *)outData;
|
|
for( unsigned int i = 0; i < chanelCount; i++ )
|
|
ptr[ i ] = (short)SATURATE( srcVector[ i ], -32768, 32767 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT32:
|
|
{
|
|
int *ptr = (int *)outData;
|
|
for( unsigned int i = 0; i < chanelCount; i++ )
|
|
ptr[ i ] = (int)srcVector[ i ];
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
int round_to_even( float v )
|
|
{
|
|
// clamp overflow
|
|
if( v >= - (float) INT_MIN )
|
|
return INT_MAX;
|
|
if( v <= (float) INT_MIN )
|
|
return INT_MIN;
|
|
|
|
// round fractional values to integer value
|
|
if( fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
|
|
{
|
|
static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23), MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
|
|
float magicVal = magic[ v < 0.0f ];
|
|
v += magicVal;
|
|
v -= magicVal;
|
|
}
|
|
|
|
return (int) v;
|
|
}
|
|
|
|
#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
|
#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : v * max ) )
|
|
#define NORMALIZE_SIGNED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
|
#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : v * max ) )
|
|
#define CONVERT_INT( v, min, max, max_val) ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
|
|
#define CONVERT_UINT( v, max, max_val) ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
|
|
|
|
void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData )
|
|
{
|
|
swizzle_vector_for_image<float>( srcVector, imageFormat );
|
|
size_t channelCount = get_format_channel_count( imageFormat );
|
|
switch( imageFormat->image_channel_data_type )
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort *)outData;
|
|
|
|
switch( gFloatToHalfRoundingMode )
|
|
{
|
|
case kRoundToNearestEven:
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = float2half_rte( srcVector[ i ] );
|
|
break;
|
|
case kRoundTowardZero:
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = float2half_rtz( srcVector[ i ] );
|
|
break;
|
|
default:
|
|
log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
|
|
exit(-1);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_FLOAT:
|
|
{
|
|
cl_float *ptr = (cl_float *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = srcVector[ i ];
|
|
break;
|
|
}
|
|
|
|
case CL_SNORM_INT8:
|
|
{
|
|
cl_char *ptr = (cl_char *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (char)NORMALIZE_SIGNED( srcVector[ i ], -127.0f, 127.f );
|
|
break;
|
|
}
|
|
case CL_SNORM_INT16:
|
|
{
|
|
cl_short *ptr = (cl_short *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (short)NORMALIZE_SIGNED( srcVector[ i ], -32767.f, 32767.f );
|
|
break;
|
|
}
|
|
case CL_UNORM_INT8:
|
|
{
|
|
cl_uchar *ptr = (cl_uchar *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (unsigned char)NORMALIZE( srcVector[ i ], 255.f );
|
|
#ifdef CL_1RGB_APPLE
|
|
if( imageFormat->image_channel_order == CL_1RGB_APPLE )
|
|
ptr[0] = 255.0f;
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
if( imageFormat->image_channel_order == CL_BGR1_APPLE )
|
|
ptr[3] = 255.0f;
|
|
#endif
|
|
break;
|
|
}
|
|
case CL_UNORM_INT16:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (unsigned short)NORMALIZE( srcVector[ i ], 65535.f );
|
|
break;
|
|
}
|
|
case CL_UNORM_SHORT_555:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort *)outData;
|
|
ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 10 ) |
|
|
( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 31.f ) & 31 ) << 5 ) |
|
|
( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
|
|
break;
|
|
}
|
|
case CL_UNORM_SHORT_565:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort *)outData;
|
|
ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 11 ) |
|
|
( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 63.f ) & 63 ) << 5 ) |
|
|
( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
|
|
break;
|
|
}
|
|
case CL_UNORM_INT_101010:
|
|
{
|
|
cl_uint *ptr = (cl_uint *)outData;
|
|
ptr[ 0 ] = ( ( (unsigned int)NORMALIZE( srcVector[ 0 ], 1023.f ) & 1023 ) << 20 ) |
|
|
( ( (unsigned int)NORMALIZE( srcVector[ 1 ], 1023.f ) & 1023 ) << 10 ) |
|
|
( ( (unsigned int)NORMALIZE( srcVector[ 2 ], 1023.f ) & 1023 ) << 0 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT8:
|
|
{
|
|
cl_char *ptr = (cl_char *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (char)CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT16:
|
|
{
|
|
cl_short *ptr = (cl_short *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (short)CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT32:
|
|
{
|
|
cl_int *ptr = (cl_int *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (int)CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT8:
|
|
{
|
|
cl_uchar *ptr = (cl_uchar *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (cl_uchar)CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT16:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (cl_ushort)CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT32:
|
|
{
|
|
cl_uint *ptr = (cl_uint *)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
ptr[ i ] = (cl_uint)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX );
|
|
break;
|
|
}
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
{
|
|
cl_ushort *ptr = (cl_ushort*)outData;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
{
|
|
cl_float f = fmaxf( srcVector[i], -1.0f );
|
|
f = fminf( f, 3.0f );
|
|
cl_int d = rintf(f * 0x1.0p14f);
|
|
d += 16384;
|
|
if( d > CL_USHRT_MAX )
|
|
d = CL_USHRT_MAX;
|
|
ptr[i] = d;
|
|
}
|
|
break;
|
|
}
|
|
#endif
|
|
default:
|
|
log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
|
|
exit(-1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors )
|
|
{
|
|
size_t channelCount = get_format_channel_count( imageFormat );
|
|
switch( imageFormat->image_channel_data_type )
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = Ulp_Error_Half( ptr[i], srcVector[i] );
|
|
|
|
break;
|
|
}
|
|
|
|
case CL_FLOAT:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = Ulp_Error( ptr[i], srcVector[i] );
|
|
|
|
break;
|
|
}
|
|
|
|
case CL_SNORM_INT8:
|
|
{
|
|
const cl_char *ptr = (const cl_char *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -127.0f, 127.f );
|
|
|
|
break;
|
|
}
|
|
case CL_SNORM_INT16:
|
|
{
|
|
const cl_short *ptr = (const cl_short *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -32767.f, 32767.f );
|
|
|
|
break;
|
|
}
|
|
case CL_UNORM_INT8:
|
|
{
|
|
const cl_uchar *ptr = (const cl_uchar *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 255.f );
|
|
|
|
break;
|
|
}
|
|
case CL_UNORM_INT16:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 65535.f );
|
|
|
|
break;
|
|
}
|
|
case CL_UNORM_SHORT_555:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
errors[0] = ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
|
|
errors[1] = ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 31.f );
|
|
errors[2] = ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
|
|
|
|
break;
|
|
}
|
|
case CL_UNORM_SHORT_565:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
errors[0] = ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
|
|
errors[1] = ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 63.f );
|
|
errors[2] = ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
|
|
|
|
break;
|
|
}
|
|
case CL_UNORM_INT_101010:
|
|
{
|
|
const cl_uint *ptr = (const cl_uint *)results;
|
|
|
|
errors[0] = ((ptr[0] >> 20) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 1023.f );
|
|
errors[1] = ((ptr[0] >> 10) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 1023.f );
|
|
errors[2] = ((ptr[0] >> 0) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 1023.f );
|
|
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT8:
|
|
{
|
|
const cl_char *ptr = (const cl_char *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[ i ] = ptr[i] - CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
|
|
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT16:
|
|
{
|
|
const cl_short *ptr = (const cl_short *)results;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[ i ] - CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767 );
|
|
break;
|
|
}
|
|
case CL_SIGNED_INT32:
|
|
{
|
|
const cl_int *ptr = (const cl_int *)results;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long) CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX ));
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT8:
|
|
{
|
|
const cl_uchar *ptr = (const cl_uchar *)results;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT16:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
|
|
break;
|
|
}
|
|
case CL_UNSIGNED_INT32:
|
|
{
|
|
const cl_uint *ptr = (const cl_uint *)results;
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX ));
|
|
break;
|
|
}
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
{
|
|
const cl_ushort *ptr = (const cl_ushort *)results;
|
|
|
|
for( unsigned int i = 0; i < channelCount; i++ )
|
|
errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( ((int) srcVector[ i ] - 16384), -16384.f, 49151.f );
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
default:
|
|
log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
|
|
exit(-1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
|
|
// This should be called lazily before attempting to verify image writes, otherwise an error will occur.
|
|
//
|
|
int DetectFloatToHalfRoundingMode( cl_command_queue q ) // Returns CL_SUCCESS on success
|
|
{
|
|
cl_int err = CL_SUCCESS;
|
|
|
|
if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
|
|
{
|
|
// Some numbers near 0.5f, that we look at to see how the values are rounded.
|
|
static const cl_uint inData[4*4] = { 0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
|
|
0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U };
|
|
static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
|
|
const float *inp = (const float*) inData;
|
|
cl_context context = NULL;
|
|
|
|
// Create an input buffer
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get context from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
return err;
|
|
}
|
|
|
|
cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
|
|
if( NULL == inBuf || err )
|
|
{
|
|
log_error( "Error: could not create input buffer in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
return err;
|
|
}
|
|
|
|
// Create a small output image
|
|
cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
|
|
cl_mem outImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &fmt, count, 1, 0, NULL, &err );
|
|
if( NULL == outImage || err )
|
|
{
|
|
log_error( "Error: could not create half float out image in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
return err;
|
|
}
|
|
|
|
// Create our program, and a kernel
|
|
const char *kernel[1] = {
|
|
"kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
|
|
"{\n"
|
|
" write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
|
|
"}\n" };
|
|
cl_program program = clCreateProgramWithSource( context, 1, kernel, NULL, &err );
|
|
if( NULL == program || err )
|
|
{
|
|
log_error( "Error: could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
return err;
|
|
}
|
|
|
|
cl_device_id device = NULL;
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get device from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clBuildProgram( program, 1, &device, "", NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not build program in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
cl_kernel k = clCreateKernel( program, "detect_round", &err );
|
|
if( NULL == k || err )
|
|
{
|
|
log_error( "Error: could not create kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Run the kernel
|
|
size_t global_work_size = count;
|
|
err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// read the results
|
|
cl_ushort outBuf[count*4];
|
|
memset( outBuf, -1, sizeof( outBuf ) );
|
|
size_t origin[3] = {0,0,0};
|
|
size_t region[3] = {count,1,1};
|
|
err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Generate our list of reference results
|
|
cl_ushort rte_ref[count*4];
|
|
cl_ushort rtz_ref[count*4];
|
|
for( size_t i = 0; i < 4 * count; i++ )
|
|
{
|
|
rte_ref[i] = float2half_rte( inp[i] );
|
|
rtz_ref[i] = float2half_rtz( inp[i] );
|
|
}
|
|
|
|
// Verify that we got something in either rtz or rte mode
|
|
if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rte\n" );
|
|
gFloatToHalfRoundingMode = kRoundToNearestEven;
|
|
}
|
|
else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rtz\n" );
|
|
gFloatToHalfRoundingMode = kRoundTowardZero;
|
|
}
|
|
else
|
|
{
|
|
log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
|
|
log_info( "\nfor:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
|
|
log_info( "\ngot:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
|
|
log_info( "\nrte:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
|
|
log_info( "\nrtz:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
|
|
log_info( "\n" );
|
|
err = -1;
|
|
gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
|
|
}
|
|
|
|
// clean up
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Make sure that the rounding mode was successfully detected, if we checked earlier
|
|
if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
|
|
return -2;
|
|
|
|
return err;
|
|
}
|
|
|
|
char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
|
|
{
|
|
size_t numPixels = imageInfo->width * imageInfo->height
|
|
* (imageInfo->depth ? imageInfo->depth : 1)
|
|
* (imageInfo->arraySize ? imageInfo->arraySize : 1);
|
|
size_t allocSize = numPixels * 4 * get_explicit_type_size( dataType );
|
|
|
|
#if defined( __APPLE__ )
|
|
char *data = NULL;
|
|
if (gDeviceType == CL_DEVICE_TYPE_CPU) {
|
|
size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
|
|
|
|
void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
|
intptr_t data_end = (intptr_t)map + mapSize - 4096;
|
|
data = (char *)(data_end - (intptr_t)allocSize);
|
|
|
|
mprotect(map, 4096, PROT_NONE);
|
|
mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
|
|
P.reset(data, map, mapSize);
|
|
} else {
|
|
data = (char *)malloc(allocSize);
|
|
P.reset(data);
|
|
}
|
|
#else
|
|
char *data = (char *)malloc(allocSize);
|
|
P.reset(data);
|
|
#endif
|
|
|
|
switch( dataType )
|
|
{
|
|
case kFloat:
|
|
{
|
|
float *inputValues = (float *)data;
|
|
switch (imageInfo->format->image_channel_data_type)
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
// Generate data that is (mostly) inside the range of a half float
|
|
// const float HALF_MIN = 5.96046448e-08f;
|
|
const float HALF_MAX = 65504.0f;
|
|
|
|
size_t i = 0;
|
|
inputValues[ i++ ] = 0.f;
|
|
inputValues[ i++ ] = 1.f;
|
|
inputValues[ i++ ] = -1.f;
|
|
inputValues[ i++ ] = 2.f;
|
|
for( ; i < numPixels * 4; i++ )
|
|
inputValues[ i ] = get_random_float( -HALF_MAX - 2.f, HALF_MAX + 2.f, d );
|
|
}
|
|
break;
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
{
|
|
size_t i = 0;
|
|
if( numPixels * 4 >= 8 )
|
|
{
|
|
inputValues[ i++ ] = INFINITY;
|
|
inputValues[ i++ ] = 0x1.0p14f;
|
|
inputValues[ i++ ] = 0x1.0p31f;
|
|
inputValues[ i++ ] = 0x1.0p32f;
|
|
inputValues[ i++ ] = -INFINITY;
|
|
inputValues[ i++ ] = -0x1.0p14f;
|
|
inputValues[ i++ ] = -0x1.0p31f;
|
|
inputValues[ i++ ] = -0x1.1p31f;
|
|
}
|
|
for( ; i < numPixels * 4; i++ )
|
|
inputValues[ i ] = get_random_float( -1.1f, 3.1f, d );
|
|
}
|
|
break;
|
|
#endif
|
|
case CL_FLOAT:
|
|
{
|
|
size_t i = 0;
|
|
inputValues[ i++ ] = INFINITY;
|
|
inputValues[ i++ ] = -INFINITY;
|
|
inputValues[ i++ ] = 0.0f;
|
|
inputValues[ i++ ] = 0.0f;
|
|
cl_uint *p = (cl_uint *)data;
|
|
for( ; i < numPixels * 4; i++ )
|
|
p[ i ] = genrand_int32(d);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
size_t i = 0;
|
|
if( numPixels * 4 >= 36 )
|
|
{
|
|
inputValues[ i++ ] = 0.0f;
|
|
inputValues[ i++ ] = 0.5f;
|
|
inputValues[ i++ ] = 31.5f;
|
|
inputValues[ i++ ] = 32.0f;
|
|
inputValues[ i++ ] = 127.5f;
|
|
inputValues[ i++ ] = 128.0f;
|
|
inputValues[ i++ ] = 255.5f;
|
|
inputValues[ i++ ] = 256.0f;
|
|
inputValues[ i++ ] = 1023.5f;
|
|
inputValues[ i++ ] = 1024.0f;
|
|
inputValues[ i++ ] = 32767.5f;
|
|
inputValues[ i++ ] = 32768.0f;
|
|
inputValues[ i++ ] = 65535.5f;
|
|
inputValues[ i++ ] = 65536.0f;
|
|
inputValues[ i++ ] = 2147483648.0f;
|
|
inputValues[ i++ ] = 4294967296.0f;
|
|
inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
|
|
inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
|
|
inputValues[ i++ ] = -0.0f;
|
|
inputValues[ i++ ] = -0.5f;
|
|
inputValues[ i++ ] = -31.5f;
|
|
inputValues[ i++ ] = -32.0f;
|
|
inputValues[ i++ ] = -127.5f;
|
|
inputValues[ i++ ] = -128.0f;
|
|
inputValues[ i++ ] = -255.5f;
|
|
inputValues[ i++ ] = -256.0f;
|
|
inputValues[ i++ ] = -1023.5f;
|
|
inputValues[ i++ ] = -1024.0f;
|
|
inputValues[ i++ ] = -32767.5f;
|
|
inputValues[ i++ ] = -32768.0f;
|
|
inputValues[ i++ ] = -65535.5f;
|
|
inputValues[ i++ ] = -65536.0f;
|
|
inputValues[ i++ ] = -2147483648.0f;
|
|
inputValues[ i++ ] = -4294967296.0f;
|
|
inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
|
|
inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
|
|
}
|
|
if( is_format_signed(imageInfo->format) )
|
|
{
|
|
for( ; i < numPixels * 4; i++ )
|
|
inputValues[ i ] = get_random_float( -1.1f, 1.1f, d );
|
|
}
|
|
else
|
|
{
|
|
for( ; i < numPixels * 4; i++ )
|
|
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
case kInt:
|
|
{
|
|
int *imageData = (int *)data;
|
|
|
|
// We want to generate ints (mostly) in range of the target format
|
|
int formatMin = get_format_min_int( imageInfo->format );
|
|
size_t formatMax = get_format_max_int( imageInfo->format );
|
|
if( formatMin == 0 )
|
|
{
|
|
// Unsigned values, but we are only an int, so cap the actual max at the max of signed ints
|
|
if( formatMax > 2147483647L )
|
|
formatMax = 2147483647L;
|
|
}
|
|
// If the final format is small enough, give us a bit of room for out-of-range values to test
|
|
if( formatMax < 2147483647L )
|
|
formatMax += 2;
|
|
if( formatMin > -2147483648LL )
|
|
formatMin -= 2;
|
|
|
|
// Now gen
|
|
for( size_t i = 0; i < numPixels * 4; i++ )
|
|
{
|
|
imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
|
|
}
|
|
break;
|
|
}
|
|
|
|
case kUInt:
|
|
case kUnsignedInt:
|
|
{
|
|
unsigned int *imageData = (unsigned int *)data;
|
|
|
|
// We want to generate ints (mostly) in range of the target format
|
|
int formatMin = get_format_min_int( imageInfo->format );
|
|
size_t formatMax = get_format_max_int( imageInfo->format );
|
|
if( formatMin < 0 )
|
|
formatMin = 0;
|
|
// If the final format is small enough, give us a bit of room for out-of-range values to test
|
|
if( formatMax < 4294967295LL )
|
|
formatMax += 2;
|
|
|
|
// Now gen
|
|
for( size_t i = 0; i < numPixels * 4; i++ )
|
|
{
|
|
imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
// Unsupported source format
|
|
delete [] data;
|
|
return NULL;
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
/*
|
|
deprecated
|
|
bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue )
|
|
{
|
|
int v = (int)value;
|
|
|
|
switch(imageSampler->addressing_mode)
|
|
{
|
|
case CL_ADDRESS_REPEAT:
|
|
outValue = v;
|
|
while( v < 0 )
|
|
v += (int)max;
|
|
while( v >= (int)max )
|
|
v -= (int)max;
|
|
if( v != outValue )
|
|
{
|
|
outValue = v;
|
|
return true;
|
|
}
|
|
return false;
|
|
|
|
case CL_ADDRESS_MIRRORED_REPEAT:
|
|
log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do we ever use this?
|
|
exit(-1);
|
|
|
|
default:
|
|
if( v < 0 )
|
|
{
|
|
outValue = 0;
|
|
return true;
|
|
}
|
|
if( v >= (int)max )
|
|
{
|
|
outValue = (int)max - 1;
|
|
return true;
|
|
}
|
|
outValue = v;
|
|
return false;
|
|
}
|
|
|
|
}
|
|
*/
|
|
|
|
void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine )
|
|
{
|
|
const char *normalized;
|
|
const char *addressMode;
|
|
const char *filterMode;
|
|
|
|
if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP )
|
|
addressMode = "CLK_ADDRESS_CLAMP";
|
|
else if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
|
|
addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
|
|
else if( imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
|
|
addressMode = "CLK_ADDRESS_REPEAT";
|
|
else if( imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
|
|
addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
|
|
else if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
|
|
addressMode = "CLK_ADDRESS_NONE";
|
|
else
|
|
{
|
|
log_error( "**Error: Unknown addressing mode! Aborting...\n" );
|
|
abort();
|
|
}
|
|
|
|
if( imageSampler->normalized_coords )
|
|
normalized = "CLK_NORMALIZED_COORDS_TRUE";
|
|
else
|
|
normalized = "CLK_NORMALIZED_COORDS_FALSE";
|
|
|
|
if( imageSampler->filter_mode == CL_FILTER_LINEAR )
|
|
filterMode = "CLK_FILTER_LINEAR";
|
|
else
|
|
filterMode = "CLK_FILTER_NEAREST";
|
|
|
|
sprintf( outLine, " const sampler_t imageSampler = %s | %s | %s;\n", addressMode, filterMode, normalized );
|
|
}
|
|
|
|
void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
|
|
const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] )
|
|
{
|
|
// assert( srcImageInfo->format == dstImageInfo->format );
|
|
|
|
size_t pixelSize = get_pixel_size( srcImageInfo->format );
|
|
|
|
// Get initial pointers
|
|
char *sourcePtr = (char *)imageValues + sourcePos[ 2 ] * srcImageInfo->slicePitch + sourcePos[ 1 ] * srcImageInfo->rowPitch + pixelSize * sourcePos[ 0 ];
|
|
char *destPtr = (char *)destImageValues + destPos[ 2 ] * dstImageInfo->slicePitch + destPos[ 1 ] * dstImageInfo->rowPitch + pixelSize * destPos[ 0 ];
|
|
|
|
for( size_t z = 0; z < ( regionSize[ 2 ] > 0 ? regionSize[ 2 ] : 1 ); z++ )
|
|
{
|
|
char *rowSourcePtr = sourcePtr;
|
|
char *rowDestPtr = destPtr;
|
|
for( size_t y = 0; y < regionSize[ 1 ]; y++ )
|
|
{
|
|
memcpy( rowDestPtr, rowSourcePtr, pixelSize * regionSize[ 0 ] );
|
|
rowSourcePtr += srcImageInfo->rowPitch;
|
|
rowDestPtr += dstImageInfo->rowPitch;
|
|
}
|
|
|
|
sourcePtr += srcImageInfo->slicePitch;
|
|
destPtr += dstImageInfo->slicePitch;
|
|
}
|
|
}
|
|
|
|
float random_float(float low, float high, MTdata d)
|
|
{
|
|
float t = (float) genrand_real1(d);
|
|
return (1.0f - t) * low + t * high;
|
|
}
|
|
|
|
CoordWalker::CoordWalker( void * coords, bool useFloats, size_t vecSize )
|
|
{
|
|
if( useFloats )
|
|
{
|
|
mFloatCoords = (cl_float *)coords;
|
|
mIntCoords = NULL;
|
|
}
|
|
else
|
|
{
|
|
mFloatCoords = NULL;
|
|
mIntCoords = (cl_int *)coords;
|
|
}
|
|
mVecSize = vecSize;
|
|
}
|
|
|
|
CoordWalker::~CoordWalker()
|
|
{
|
|
}
|
|
|
|
cl_float CoordWalker::Get( size_t idx, size_t el )
|
|
{
|
|
if( mIntCoords != NULL )
|
|
return (cl_float)mIntCoords[ idx * mVecSize + el ];
|
|
else
|
|
return mFloatCoords[ idx * mVecSize + el ];
|
|
}
|
|
|
|
|
|
void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err, int t )
|
|
{
|
|
const char *addressMode = NULL;
|
|
const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
|
|
|
|
if( sampler->addressing_mode == CL_ADDRESS_CLAMP )
|
|
addressMode = "CL_ADDRESS_CLAMP";
|
|
else if( sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
|
|
addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
|
|
else if( sampler->addressing_mode == CL_ADDRESS_REPEAT )
|
|
addressMode = "CL_ADDRESS_REPEAT";
|
|
else if( sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
|
|
addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
|
|
else
|
|
addressMode = "CL_ADDRESS_NONE";
|
|
|
|
if( t )
|
|
{
|
|
if( err )
|
|
log_error( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ),
|
|
sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
|
|
addressMode,
|
|
normalizedNames[sampler->normalized_coords ? 1 : 0],
|
|
t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
|
|
else
|
|
log_info( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ),
|
|
sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
|
|
addressMode,
|
|
normalizedNames[sampler->normalized_coords ? 1 : 0],
|
|
t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
|
|
}
|
|
else
|
|
{
|
|
if( err )
|
|
log_error( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ),
|
|
sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
|
|
addressMode,
|
|
normalizedNames[sampler->normalized_coords ? 1 : 0] );
|
|
else
|
|
log_info( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ),
|
|
sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
|
|
addressMode,
|
|
normalizedNames[sampler->normalized_coords ? 1 : 0] );
|
|
}
|
|
|
|
}
|
|
|
|
void print_write_header( cl_image_format *format, bool err = false)
|
|
{
|
|
if( err )
|
|
log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ) );
|
|
else
|
|
log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ) );
|
|
}
|
|
|
|
|
|
void print_header( cl_image_format *format, bool err = false )
|
|
{
|
|
if (err) {
|
|
log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ) );
|
|
} else {
|
|
log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
|
|
GetChannelTypeName( format->image_channel_data_type ),
|
|
(int)get_format_channel_count( format ) );
|
|
}
|
|
}
|
|
|
|
bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind )
|
|
{
|
|
for( unsigned int i = 0; i < numFormats; i++ )
|
|
{
|
|
if( formatList[ i ].image_channel_order == formatToFind->image_channel_order &&
|
|
formatList[ i ].image_channel_data_type == formatToFind->image_channel_data_type )
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags )
|
|
{
|
|
cl_image_format readFormatsToSupport[] = { { CL_RGBA, CL_UNORM_INT8 },
|
|
{ CL_RGBA, CL_UNORM_INT16 },
|
|
{ CL_RGBA, CL_SIGNED_INT8 },
|
|
{ CL_RGBA, CL_SIGNED_INT16 },
|
|
{ CL_RGBA, CL_SIGNED_INT32 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT8 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT16 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT32 },
|
|
{ CL_RGBA, CL_HALF_FLOAT },
|
|
{ CL_RGBA, CL_FLOAT },
|
|
{ CL_BGRA, CL_UNORM_INT8} };
|
|
|
|
cl_image_format writeFormatsToSupport[] = { { CL_RGBA, CL_UNORM_INT8 },
|
|
{ CL_RGBA, CL_UNORM_INT16 },
|
|
{ CL_RGBA, CL_SIGNED_INT8 },
|
|
{ CL_RGBA, CL_SIGNED_INT16 },
|
|
{ CL_RGBA, CL_SIGNED_INT32 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT8 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT16 },
|
|
{ CL_RGBA, CL_UNSIGNED_INT32 },
|
|
{ CL_RGBA, CL_HALF_FLOAT },
|
|
{ CL_RGBA, CL_FLOAT },
|
|
{ CL_BGRA, CL_UNORM_INT8} };
|
|
|
|
cl_image_format *formatsToTest;
|
|
unsigned int testCount;
|
|
bool passed = true;
|
|
|
|
if( flags == CL_MEM_READ_ONLY )
|
|
{
|
|
formatsToTest = readFormatsToSupport;
|
|
testCount = sizeof( readFormatsToSupport ) / sizeof( readFormatsToSupport[ 0 ] );
|
|
}
|
|
else
|
|
{
|
|
formatsToTest = writeFormatsToSupport;
|
|
testCount = sizeof( writeFormatsToSupport ) / sizeof( writeFormatsToSupport[ 0 ] );
|
|
}
|
|
|
|
for( unsigned int i = 0; i < testCount; i++ )
|
|
{
|
|
if( !find_format( formatList, numFormats, &formatsToTest[ i ] ) )
|
|
{
|
|
log_error( "ERROR: Format required by OpenCL 1.0 is not supported: " );
|
|
print_header( &formatsToTest[ i ], true );
|
|
gTestCount++;
|
|
gTestFailure++;
|
|
passed = false;
|
|
}
|
|
}
|
|
return passed;
|
|
}
|
|
|
|
|