Initial open source release of OpenCL 2.0 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
set(MODULE_NAME IMAGE_STREAMS)
set(${MODULE_NAME}_SOURCES
main.cpp
test_iterations.cpp
test_loops.cpp
test_read_1D.cpp
test_read_1D_array.cpp
test_read_2D_array.cpp
test_read_3D.cpp
test_write_image.cpp
test_write_1D.cpp
test_write_1D_array.cpp
test_write_2D_array.cpp
test_write_3D.cpp
../../../test_common/harness/errorHelpers.c
../../../test_common/harness/threadTesting.c
../../../test_common/harness/kernelHelpers.c
../../../test_common/harness/imageHelpers.cpp
../../../test_common/harness/mt19937.c
../../../test_common/harness/conversions.c
../../../test_common/harness/testHarness.c
../../../test_common/harness/typeWrappers.cpp
../../../test_common/harness/msvc9.c
)
include(../../CMakeCommon.txt)

View File

@@ -0,0 +1,19 @@
project
: requirements
# <toolset>gcc:<cflags>-xc++
# <toolset>msvc:<cflags>"/TP"
;
exe test_image_streams
: main.cpp
test_iterations.cpp
test_loops.cpp
test_read_3D.cpp
test_write_image.cpp
;
install dist
: test_image_streams
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/kernel_read_write
<variant>release:<location>$(DIST)/release/tests/test_conformance/images/kernel_read_write
;

View File

@@ -0,0 +1,56 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.cpp \
test_iterations.cpp \
test_loops.cpp \
test_write_image.cpp \
test_read_1D.cpp \
test_read_3D.cpp \
test_read_1D_array.cpp \
test_read_2D_array.cpp \
test_write_1D.cpp \
test_write_3D.cpp \
test_write_1D_array.cpp \
test_write_2D_array.cpp \
../../../test_common/harness/errorHelpers.c \
../../../test_common/harness/threadTesting.c \
../../../test_common/harness/kernelHelpers.c \
../../../test_common/harness/imageHelpers.cpp \
../../../test_common/harness/conversions.c \
../../../test_common/harness/testHarness.c \
../../../test_common/harness/mt19937.c \
../../../test_common/harness/typeWrappers.cpp
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK =
HEADERS =
TARGET = test_image_streams
INCLUDE = -I../../test_common/harness
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
CC = c++
CXX = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,651 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#if !defined(_WIN32)
#include <unistd.h>
#include <sys/time.h>
#endif
#include "../testBase.h"
#include "../../../test_common/harness/fpcontrol.h"
#include <vector>
#if defined(__PPC__)
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
// be used because not all Power implementations retain or observed the NI (non-IEEE
// mode) bit.
__thread fpu_control_t fpu_control = 0;
#endif
bool gDebugTrace = false, gExtraValidateInfo = false, gDisableOffsets = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false, gTestImage2DFromBuffer = 0, gTestMipmaps = false;
cl_filter_mode gFilterModeToUse = (cl_filter_mode)-1;
// Default is CL_MEM_USE_HOST_PTR for the test
cl_mem_flags gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
bool gUseKernelSamplers = false;
int gTypesToTest = 0;
cl_addressing_mode gAddressModeToUse = (cl_addressing_mode)-1;
int gNormalizedModeToUse = 7;
cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
bool gEnablePitch = false;
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
int gtestTypesToRun = 0;
cl_command_queue queue;
cl_context context;
#define MAX_ALLOWED_STD_DEVIATION_IN_MB 8.0
void printUsage( const char *execName )
{
const char *p = strrchr( execName, '/' );
if( p != NULL )
execName = p + 1;
log_info( "Usage: %s [read] [write] [CL_FILTER_LINEAR|CL_FILTER_NEAREST] [no_offsets] [debug_trace] [small_images]\n", execName );
log_info( "Where:\n" );
log_info( "\n" );
log_info( "\tThe following flags specify what kinds of operations to test. They can be combined; if none are specified, all are tested:\n" );
log_info( "\t\tread - Tests reading from an image\n" );
log_info( "\t\twrite - Tests writing to an image (can be specified with read to run both; default is both)\n" );
log_info( "\n" );
log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
log_info( "\t\tint - Test integer I/O (read_imagei, write_imagei)\n" );
log_info( "\t\tuint - Test unsigned integer I/O (read_imageui, write_imageui)\n" );
log_info( "\t\tfloat - Test float I/O (read_imagef, write_imagef)\n" );
log_info( "\n" );
log_info( "\tCL_FILTER_LINEAR - Only tests formats with CL_FILTER_LINEAR filtering\n" );
log_info( "\tCL_FILTER_NEAREST - Only tests formats with CL_FILTER_NEAREST filtering\n" );
log_info( "\n" );
log_info( "\tNORMALIZED - Only tests formats with NORMALIZED coordinates\n" );
log_info( "\tUNNORMALIZED - Only tests formats with UNNORMALIZED coordinates\n" );
log_info( "\n" );
log_info( "\tCL_ADDRESS_CLAMP - Only tests formats with CL_ADDRESS_CLAMP addressing\n" );
log_info( "\tCL_ADDRESS_CLAMP_TO_EDGE - Only tests formats with CL_ADDRESS_CLAMP_TO_EDGE addressing\n" );
log_info( "\tCL_ADDRESS_REPEAT - Only tests formats with CL_ADDRESS_REPEAT addressing\n" );
log_info( "\tCL_ADDRESS_MIRRORED_REPEAT - Only tests formats with CL_ADDRESS_MIRRORED_REPEAT addressing\n" );
log_info( "\n" );
log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
log_info( "\n" );
log_info( "\t1D - Only test 1D images\n" );
log_info( "\t2D - Only test 2D images\n" );
log_info( "\t3D - Only test 3D images\n" );
log_info( "\t1Darray - Only test 1D image arrays\n" );
log_info( "\t2Darray - Only test 2D image arrays\n" );
log_info( "\n" );
log_info( "\tlocal_samplers - Use samplers declared in the kernel functions instead of passed in as arguments\n" );
log_info( "\n" );
log_info( "\tThe following specify to use the specific flag to allocate images to use in the tests:\n" );
log_info( "\t\tCL_MEM_COPY_HOST_PTR\n" );
log_info( "\t\tCL_MEM_USE_HOST_PTR (default)\n" );
log_info( "\t\tCL_MEM_ALLOC_HOST_PTR\n" );
log_info( "\t\tNO_HOST_PTR - Specifies to use none of the above flags\n" );
log_info( "\n" );
log_info( "\tThe following modify the types of images tested:\n" );
log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
log_info( "\t\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
log_info( "\n" );
log_info( "\tno_offsets - Disables offsets when testing reads (can be good for diagnosing address repeating/clamping problems)\n" );
log_info( "\tdebug_trace - Enables additional debug info logging\n" );
log_info( "\textra_validate - Enables additional validation failure debug information\n" );
log_info( "\tuse_pitches - Enables row and slice pitches\n" );
log_info( "\ttest_mipmaps - Enables mipmapped images\n");
}
extern int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
/** read_write images only support sampler-less read buildt-ins which require special settings
* for some global parameters. This pair of functions temporarily overwrite those global parameters
* and then recover them after completing a read_write test.
*/
static void overwrite_global_params_for_read_write_test( bool *tTestMipmaps,
bool *tDisableOffsets,
bool *tNormalizedModeToUse,
cl_filter_mode *tFilterModeToUse)
{
log_info("Overwrite global settings for read_write image tests. The overwritten values:\n");
log_info("gTestMipmaps = false, gDisableOffsets = true, gNormalizedModeToUse = false, gFilterModeToUse = CL_FILTER_NEAREST\n" );
// mipmap images only support sampler read built-in while read_write images only support
// sampler-less read built-in. Hence we cannot test mipmap for read_write image.
*tTestMipmaps = gTestMipmaps;
gTestMipmaps = false;
// Read_write images are read by sampler-less read which does not handle out-of-bound read
// It's application responsibility to make sure that the read happens in-bound
// Therefore we should not enable offset in testing read_write images because it will cause out-of-bound
*tDisableOffsets = gDisableOffsets;
gDisableOffsets = true;
// The sampler-less read image functions behave exactly as the corresponding read image functions
*tNormalizedModeToUse = gNormalizedModeToUse;
gNormalizedModeToUse = false;
*tFilterModeToUse = gFilterModeToUse;
gFilterModeToUse = CL_FILTER_NEAREST;
}
/** Recover the global settings overwritten for read_write tests. This is necessary because
* there may be other tests (i.e. read or write) are called together with read_write test.
*/
static void recover_global_params_from_read_write_test(bool tTestMipmaps,
bool tDisableOffsets,
bool tNormalizedModeToUse,
cl_filter_mode tFilterModeToUse)
{
gTestMipmaps = tTestMipmaps;
gDisableOffsets = tDisableOffsets;
gNormalizedModeToUse = tNormalizedModeToUse;
gFilterModeToUse = tFilterModeToUse;
}
int main(int argc, const char *argv[])
{
cl_platform_id platform;
cl_device_id device;
cl_channel_type chanType;
cl_channel_order chanOrder;
char str[ 128 ];
int testTypesToRun = 0;
int testMethods = 0;
bool randomize = false;
bool tTestMipMaps = false;
bool tDisableOffsets = false;
bool tNormalizedModeToUse = false;
cl_filter_mode tFilterModeToUse = (cl_filter_mode)-1;
test_start();
//Check CL_DEVICE_TYPE environment variable
checkDeviceTypeOverride( &gDeviceType );
// Parse arguments
for( int i = 1; i < argc; i++ )
{
strncpy( str, argv[ i ], sizeof( str ) - 1 );
if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_CPU;
else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_GPU;
else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
else if( strcmp( str, "debug_trace" ) == 0 )
gDebugTrace = true;
else if( strcmp( str, "CL_FILTER_NEAREST" ) == 0 || strcmp( str, "NEAREST" ) == 0 )
gFilterModeToUse = CL_FILTER_NEAREST;
else if( strcmp( str, "CL_FILTER_LINEAR" ) == 0 || strcmp( str, "LINEAR" ) == 0 )
gFilterModeToUse = CL_FILTER_LINEAR;
else if( strcmp( str, "CL_ADDRESS_NONE" ) == 0 )
gAddressModeToUse = CL_ADDRESS_NONE;
else if( strcmp( str, "CL_ADDRESS_CLAMP" ) == 0 )
gAddressModeToUse = CL_ADDRESS_CLAMP;
else if( strcmp( str, "CL_ADDRESS_CLAMP_TO_EDGE" ) == 0 )
gAddressModeToUse = CL_ADDRESS_CLAMP_TO_EDGE;
else if( strcmp( str, "CL_ADDRESS_REPEAT" ) == 0 )
gAddressModeToUse = CL_ADDRESS_REPEAT;
else if( strcmp( str, "CL_ADDRESS_MIRRORED_REPEAT" ) == 0 )
gAddressModeToUse = CL_ADDRESS_MIRRORED_REPEAT;
else if( strcmp( str, "NORMALIZED" ) == 0 )
gNormalizedModeToUse = true;
else if( strcmp( str, "UNNORMALIZED" ) == 0 )
gNormalizedModeToUse = false;
else if( strcmp( str, "no_offsets" ) == 0 )
gDisableOffsets = true;
else if( strcmp( str, "small_images" ) == 0 )
gTestSmallImages = true;
else if( strcmp( str, "max_images" ) == 0 )
gTestMaxImages = true;
else if( strcmp( str, "use_pitches" ) == 0 )
gEnablePitch = true;
else if( strcmp( str, "rounding" ) == 0 )
gTestRounding = true;
else if( strcmp( str, "extra_validate" ) == 0 )
gExtraValidateInfo = true;
else if( strcmp( str, "test_mipmaps" ) == 0 ) {
// 2.0 Spec does not allow using mem flags, unnormalized coordinates with mipmapped images
gTestMipmaps = true;
gMemFlagsToUse = 0;
gNormalizedModeToUse = true;
}
else if( strcmp( str, "read" ) == 0 )
testTypesToRun |= kReadTests;
else if( strcmp( str, "write" ) == 0 )
testTypesToRun |= kWriteTests;
else if( strcmp( str, "read_write" ) == 0 )
{
testTypesToRun |= kReadWriteTests;
}
else if( strcmp( str, "local_samplers" ) == 0 )
gUseKernelSamplers = true;
else if( strcmp( str, "int" ) == 0 )
gTypesToTest |= kTestInt;
else if( strcmp( str, "uint" ) == 0 )
gTypesToTest |= kTestUInt;
else if( strcmp( str, "float" ) == 0 )
gTypesToTest |= kTestFloat;
else if( strcmp( str, "randomize" ) == 0 )
randomize = true;
else if ( strcmp( str, "1D" ) == 0 )
testMethods |= k1D;
else if( strcmp( str, "2D" ) == 0 )
testMethods |= k2D;
else if( strcmp( str, "3D" ) == 0 )
testMethods |= k3D;
else if( strcmp( str, "1Darray" ) == 0 )
testMethods |= k1DArray;
else if( strcmp( str, "2Darray" ) == 0 )
testMethods |= k2DArray;
else if( strcmp( str, "CL_MEM_COPY_HOST_PTR" ) == 0 || strcmp( str, "COPY_HOST_PTR" ) == 0 )
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
else if( strcmp( str, "CL_MEM_USE_HOST_PTR" ) == 0 || strcmp( str, "USE_HOST_PTR" ) == 0 )
gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
else if( strcmp( str, "CL_MEM_ALLOC_HOST_PTR" ) == 0 || strcmp( str, "ALLOC_HOST_PTR" ) == 0 )
gMemFlagsToUse = CL_MEM_ALLOC_HOST_PTR;
else if( strcmp( str, "NO_HOST_PTR" ) == 0 )
gMemFlagsToUse = 0;
else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
{
printUsage( argv[ 0 ] );
return -1;
}
else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
gChannelTypeToUse = chanType;
else if( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
gChannelOrderToUse = chanOrder;
else
{
log_error( "ERROR: Unknown argument %d: %s. Exiting....\n", i, str );
return -1;
}
}
if (testMethods == 0)
testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
if( testTypesToRun == 0 )
testTypesToRun = kAllTests;
if( gTypesToTest == 0 )
gTypesToTest = kTestAllTypes;
#if defined( __APPLE__ )
#if defined( __i386__ ) || defined( __x86_64__ )
#define kHasSSE3 0x00000008
#define kHasSupplementalSSE3 0x00000100
#define kHasSSE4_1 0x00000400
#define kHasSSE4_2 0x00000800
/* check our environment for a hint to disable SSE variants */
{
const char *env = getenv( "CL_MAX_SSE" );
if( env )
{
extern int _cpu_capabilities;
int mask = 0;
if( 0 == strcmp( env, "SSE4.1" ) )
mask = kHasSSE4_2;
else if( 0 == strcmp( env, "SSSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1;
else if( 0 == strcmp( env, "SSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
else if( 0 == strcmp( env, "SSE2" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
_cpu_capabilities &= ~mask;
}
}
#endif
#endif
// Seed the random # generators
if( randomize )
{
gRandomSeed = (cl_uint) time( NULL );
gReSeed = 1;
log_info( "Random seed: %u\n", gRandomSeed );
}
int error;
// Get our platform
error = clGetPlatformIDs(1, &platform, NULL);
if( error )
{
print_error( error, "Unable to get platform" );
test_finish();
return -1;
}
// Get our device
cl_uint num_devices = 0;
error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices );
if( error )
{
print_error( error, "Unable to get the number of devices" );
test_finish();
return -1;
}
std::vector<cl_device_id> devices(num_devices);
error = clGetDeviceIDs(platform, gDeviceType, num_devices, &devices[0], NULL );
if( error )
{
print_error( error, "Unable to get specified device type" );
test_finish();
return -1;
}
int device_index = 0;
char* device_index_str = getenv("CL_DEVICE_INDEX");
if (device_index_str && ((device_index = atoi(device_index_str))) >= num_devices) {
log_error("CL_DEVICE_INDEX=%d is greater than the number of devices %d\n",device_index,num_devices);
test_finish();
return -1;
}
device = devices[device_index];
// Get the device type so we know if it is a GPU even if default is passed in.
error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL);
if( error )
{
print_error( error, "Unable to get device type" );
test_finish();
return -1;
}
if( printDeviceHeader( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
// Check for image support
if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
log_info("Device does not support images. Skipping test.\n");
test_finish();
return 0;
}
// Create a context to test with
context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to create testing context" );
test_finish();
return -1;
}
// Create a queue against the context
queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to create testing command queue" );
test_finish();
return -1;
}
if( gTestSmallImages )
log_info( "Note: Using small test images\n" );
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not to flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ(&oldMode);
// Run the test now
int ret = 0;
if (testMethods & k1D)
{
if (testTypesToRun & kReadTests)
{
gtestTypesToRun = kReadTests;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
}
if (testTypesToRun & kWriteTests)
{
gtestTypesToRun = kWriteTests;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
}
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
{
gtestTypesToRun = kReadWriteTests;
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
}
}
if (testMethods & k2D)
{
if (testTypesToRun & kReadTests)
{
gtestTypesToRun = kReadTests;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
{
log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
{
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
gTestImage2DFromBuffer = true;
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
gTestImage2DFromBuffer = false;
gMemFlagsToUse = saved_gMemFlagsToUse;
}
}
}
if (testTypesToRun & kWriteTests)
{
gtestTypesToRun = kWriteTests;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
{
log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
{
bool saved_gEnablePitch = gEnablePitch;
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
gEnablePitch = true;
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
gTestImage2DFromBuffer = true;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
gTestImage2DFromBuffer = false;
gMemFlagsToUse = saved_gMemFlagsToUse;
gEnablePitch = saved_gEnablePitch;
}
}
}
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
{
gtestTypesToRun = kReadWriteTests;
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
{
log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
{
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
gTestImage2DFromBuffer = true;
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
gTestImage2DFromBuffer = false;
gMemFlagsToUse = saved_gMemFlagsToUse;
}
}
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
{
log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
{
bool saved_gEnablePitch = gEnablePitch;
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
gEnablePitch = true;
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
gTestImage2DFromBuffer = true;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
gTestImage2DFromBuffer = false;
gMemFlagsToUse = saved_gMemFlagsToUse;
gEnablePitch = saved_gEnablePitch;
}
}
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
}
}
if (testMethods & k3D)
{
if (testTypesToRun & kReadTests)
{
gtestTypesToRun = kReadTests;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
}
if (testTypesToRun & kWriteTests)
{
gtestTypesToRun = kWriteTests;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
}
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
{
gtestTypesToRun = kReadWriteTests;
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
}
}
if (testMethods & k1DArray)
{
if (testTypesToRun & kReadTests)
{
gtestTypesToRun = kReadTests;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
}
if (testTypesToRun & kWriteTests)
{
gtestTypesToRun = kWriteTests;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
}
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
{
gtestTypesToRun = kReadWriteTests;
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
}
}
if (testMethods & k2DArray)
{
if (testTypesToRun & kReadTests)
{
gtestTypesToRun = kReadTests;
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
}
if (testTypesToRun & kWriteTests)
{
gtestTypesToRun = kWriteTests;
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
}
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
{
gtestTypesToRun = kReadWriteTests;
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
}
}
// Restore FP state before leaving
RestoreFPState(&oldMode);
error = clFinish(queue);
if (error)
print_error(error, "clFinish failed.");
clReleaseContext(context);
clReleaseCommandQueue(queue);
if (gTestFailure == 0) {
if (gTestCount > 1)
log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
else
log_info("PASSED test.\n");
} else if (gTestFailure > 0) {
if (gTestCount > 1)
log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
else
log_error("FAILED test.\n");
}
// Clean up
test_finish();
if (gTestFailure > 0)
return gTestFailure;
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,466 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
extern cl_context context;
extern cl_filter_mode gFilterModeToUse;
extern cl_addressing_mode gAddressModeToUse;
extern int gTypesToTest;
extern int gNormalizedModeToUse;
extern cl_channel_type gChannelTypeToUse;
extern cl_channel_order gChannelOrderToUse;
extern bool gDebugTrace;
extern bool gTestMipmaps;
extern int gtestTypesToRun;
extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
bool floatCoords, ExplicitType outputType );
extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
bool floatCoords, ExplicitType outputType );
extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
bool floatCoords, ExplicitType outputType );
extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
bool floatCoords, ExplicitType outputType );
extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
bool floatCoords, ExplicitType outputType );
static const char *str_1d_image = "1D";
static const char *str_2d_image = "2D";
static const char *str_3d_image = "3D";
static const char *str_1d_image_array = "1D array";
static const char *str_2d_image_array = "2D array";
static const char *convert_image_type_to_string(cl_mem_object_type imageType)
{
const char *p;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
p = str_1d_image;
break;
case CL_MEM_OBJECT_IMAGE2D:
p = str_2d_image;
break;
case CL_MEM_OBJECT_IMAGE3D:
p = str_3d_image;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
p = str_1d_image_array;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
p = str_2d_image_array;
break;
}
return p;
}
int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
{
int numSupported = 0;
for( unsigned int j = 0; j < formatCount; j++ )
{
// If this format has been previously filtered, remove the filter
if( filterFlags[ j ] )
filterFlags[ j ] = false;
// skip mipmap tests for CL_DEPTH formats (re# Khronos Bug 13762)
if(gTestMipmaps && (formatList[ j ].image_channel_order == CL_DEPTH))
{
log_info("Skip mipmap tests for CL_DEPTH format\n");
filterFlags[ j ] = true;
continue;
}
// Have we already discarded the channel type via the command line?
if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
{
filterFlags[ j ] = true;
continue;
}
// Have we already discarded the channel order via the command line?
if( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
{
filterFlags[ j ] = true;
continue;
}
// Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
{
filterFlags[ j ] = true;
continue;
}
if ( !channelDataTypesToFilter )
{
numSupported++;
continue;
}
// Is the format supported?
int i;
for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
{
if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
{
numSupported++;
break;
}
}
if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
{
// Format is NOT supported, so mark it as such
filterFlags[ j ] = true;
}
}
return numSupported;
}
int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
{
int error;
cl_image_format tempList[ 128 ];
error = clGetSupportedImageFormats( context, flags,
imageType, 128, tempList, &outFormatCount );
test_error( error, "Unable to get count of supported image formats" );
outFormatList = new cl_image_format[ outFormatCount ];
error = clGetSupportedImageFormats( context, flags,
imageType, outFormatCount, outFormatList, NULL );
test_error( error, "Unable to get list of supported image formats" );
return 0;
}
int test_read_image_type( cl_device_id device, cl_image_format *format, bool floatCoords,
image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
{
int ret = 0;
cl_addressing_mode *addressModes = NULL;
// The sampler-less read image functions behave exactly as the corresponding read image functions
// described in section 6.13.14.2 that take integer coordinates and a sampler with filter mode set to
// CLK_FILTER_NEAREST, normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE
cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE, (cl_addressing_mode)-1 };
cl_addressing_mode addressModes_ro[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
if(gtestTypesToRun & kReadWriteTests)
{
addressModes = addressModes_rw;
}
else
{
addressModes = addressModes_ro;
}
#if defined( __APPLE__ )
// According to the OpenCL specification, we do not guarantee the precision
// of operations for linear filtering on the GPU. We do not test linear
// filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
// test it internally for a set of other image formats.
if ((gDeviceType == CL_DEVICE_TYPE_GPU) &&
(imageSampler->filter_mode == CL_FILTER_LINEAR) &&
(format->image_channel_order == CL_RGB) &&
(format->image_channel_data_type == CL_UNORM_INT_101010))
{
log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with CL_FILTER_LINEAR on GPU.\n");
return 0;
}
#endif
for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
{
imageSampler->addressing_mode = addressModes[ adMode ];
if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
continue; // Repeat doesn't make sense for non-normalized coords
// Use this run if we were told to only run a certain filter mode
if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
continue;
/*
Remove redundant check to see if workaround still necessary
// Check added in because this case was leaking through causing a crash on CPU
if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
continue; //repeat mode requires normalized coordinates
*/
print_read_header( format, imageSampler, false );
gTestCount++;
int retCode = 0;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
retCode = test_read_image_set_1D( device, format, imageSampler, floatCoords, outputType );
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
retCode = test_read_image_set_1D_array( device, format, imageSampler, floatCoords, outputType );
break;
case CL_MEM_OBJECT_IMAGE2D:
retCode = test_read_image_set_2D( device, format, imageSampler, floatCoords, outputType );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
retCode = test_read_image_set_2D_array( device, format, imageSampler, floatCoords, outputType );
break;
case CL_MEM_OBJECT_IMAGE3D:
retCode = test_read_image_set_3D( device, format, imageSampler, floatCoords, outputType );
break;
}
if( retCode != 0 )
{
gTestFailure++;
log_error( "FAILED: " );
print_read_header( format, imageSampler, true );
log_info( "\n" );
}
ret |= retCode;
}
return ret;
}
int test_read_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
{
int ret = 0;
bool flipFlop[2] = { false, true };
int normalizedIdx, floatCoordIdx;
// Use this run if we were told to only run a certain filter mode
if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
return 0;
// Test normalized/non-normalized
for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
{
imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
continue;
for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
{
// Checks added in because this case was leaking through causing a crash on CPU
if( !flipFlop[ floatCoordIdx ] )
if( imageSampler->filter_mode != CL_FILTER_NEAREST || // integer coords can only be used with nearest
flipFlop[ normalizedIdx ]) // Normalized integer coords makes no sense (they'd all be zero)
continue;
if( flipFlop[ floatCoordIdx ] && (gtestTypesToRun & kReadWriteTests))
// sampler-less read in read_write tests run only integer coord
continue;
log_info( "read_image (%s coords, %s results) *****************************\n",
flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer",
get_explicit_type_name( outputType ) );
for( unsigned int i = 0; i < numFormats; i++ )
{
if( filterFlags[i] )
continue;
cl_image_format &imageFormat = formatList[ i ];
ret |= test_read_image_type( device, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
}
}
}
return ret;
}
int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
{
int ret = 0;
static int printedFormatList = -1;
if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
{
gTestFailure++;
log_error( "-----------------------------------------------------\n" );
log_error( "FAILED: test writing CL_MEM_OBJECT_IMAGE3D images\n" );
log_error( "This device does not support the mandated extension cl_khr_3d_image_writes.\n");
log_error( "-----------------------------------------------------\n\n" );
return -1;
}
if ( gTestMipmaps )
{
if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
{
log_info( "-----------------------------------------------------\n" );
log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
log_info( "-----------------------------------------------------\n\n" );
return 0;
}
if ( ( 0 == is_extension_available( device, "cl_khr_mipmap_image_writes" )) && (formatTestFn == test_write_image_formats))
{
log_info( "-----------------------------------------------------\n" );
log_info( "This device does not support cl_khr_mipmap_image_writes.\nSkipping mipmapped image write test. \n" );
log_info( "-----------------------------------------------------\n\n" );
return 0;
}
}
int version_check = check_opencl_version(device,1,2);
if (version_check != 0) {
switch (imageType) {
case CL_MEM_OBJECT_IMAGE1D:
test_missing_feature(version_check, "image_1D");
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
test_missing_feature(version_check, "image_1D_array");
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
test_missing_feature(version_check, "image_2D_array");
}
}
// Grab the list of supported image formats for integer reads
cl_image_format *formatList;
bool *filterFlags;
unsigned int numFormats;
// This flag is only for querying the list of supported formats
// The flag for creating image will be set explicitly in test functions
cl_mem_flags flags;
const char *flagNames;
if( formatTestFn == test_read_image_formats )
{
if(gtestTypesToRun & kReadTests)
{
flags = CL_MEM_READ_ONLY;
flagNames = "read";
}
else
{
flags = CL_MEM_KERNEL_READ_AND_WRITE;
flagNames = "read_write";
}
}
else
{
if(gtestTypesToRun & kWriteTests)
{
flags = CL_MEM_WRITE_ONLY;
flagNames = "write";
}
else
{
flags = CL_MEM_KERNEL_READ_AND_WRITE;
flagNames = "read_write";
}
}
if( get_format_list( device, imageType, formatList, numFormats, flags ) )
return -1;
BufferOwningPtr<cl_image_format> formatListBuf(formatList);
filterFlags = new bool[ numFormats ];
if( filterFlags == NULL )
{
log_error( "ERROR: Out of memory allocating filter flags list!\n" );
return -1;
}
BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
memset( filterFlags, 0, sizeof( bool ) * numFormats );
// First time through, we'll go ahead and print the formats supported, regardless of type
int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
if( printedFormatList != test )
{
log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
for( unsigned int f = 0; f < numFormats; f++ )
{
if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_data_type ) )
log_info( " %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
GetChannelTypeName( formatList[ f ].image_channel_data_type ),
(int)get_format_channel_count( &formatList[ f ] ) );
}
log_info( "------------------------------------------- \n" );
printedFormatList = test;
}
image_sampler_data imageSampler;
/////// float tests ///////
if( gTypesToTest & kTestFloat )
{
cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
#ifdef OBSOLETE_FORAMT
CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
#endif
#ifdef CL_SFIXED14_APPLE
CL_SFIXED14_APPLE,
#endif
CL_UNORM_INT8, CL_SNORM_INT8,
CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
if( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )
{
log_info( "No formats supported for float type\n" );
}
else
{
imageSampler.filter_mode = CL_FILTER_NEAREST;
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
imageSampler.filter_mode = CL_FILTER_LINEAR;
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
}
}
/////// int tests ///////
if( gTypesToTest & kTestInt )
{
cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
if( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
{
log_info( "No formats supported for integer type\n" );
}
else
{
// Only filter mode we support on int is nearest
imageSampler.filter_mode = CL_FILTER_NEAREST;
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kInt, imageType );
}
}
/////// uint tests ///////
if( gTypesToTest & kTestUInt )
{
cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
if( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
{
log_info( "No formats supported for unsigned int type\n" );
}
else
{
// Only filter mode we support on uint is nearest
imageSampler.filter_mode = CL_FILTER_NEAREST;
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kUInt, imageType );
}
}
return ret;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,696 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#define MAX_ERR 0.005f
extern cl_command_queue queue;
extern cl_context context;
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
extern cl_filter_mode gFilterModeToSkip;
extern cl_mem_flags gMemFlagsToUse;
extern int gtestTypesToRun;
const char *readwrite1DKernelSourcePattern =
"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n"
"{\n"
" int tidX = get_global_id(0);\n"
" int offset = tidX;\n"
" write_image%s( output, tidX %s, input[ offset ]);\n"
"}";
const char *write1DKernelSourcePattern =
"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n"
"{\n"
" int tidX = get_global_id(0);\n"
" int offset = tidX;\n"
" write_image%s( output, tidX %s, input[ offset ]);\n"
"}";
int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
{
int totalErrors = 0;
size_t num_flags = 0;
const cl_mem_flags *mem_flag_types = NULL;
const char * *mem_flag_names = NULL;
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
if(gtestTypesToRun & kWriteTests)
{
mem_flag_types = write_only_mem_flag_types;
mem_flag_names = write_only_mem_flag_names;
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
}
else
{
mem_flag_types = read_write_mem_flag_types;
mem_flag_names = read_write_mem_flag_names;
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
}
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
{
int error;
size_t threads[2];
bool verifyRounding = false;
int totalErrors = 0;
int forceCorrectlyRoundedWrites = 0;
#if defined( __APPLE__ )
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
cl_device_type type = 0;
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
{
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
return 1;
}
if( type == CL_DEVICE_TYPE_CPU )
forceCorrectlyRoundedWrites = 1;
#endif
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
if( DetectFloatToHalfRoundingMode(queue) )
return 1;
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
create_random_image_data( inputType, imageInfo, imageValues, d );
if(!gTestMipmaps)
{
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
{
/* Pilot data for sRGB images */
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// We want to generate ints (mostly) in range of the target format which should be [0,255]
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
// it can test some out-of-range data points
const unsigned int test_range_ext = 16;
int formatMin = 0 - test_range_ext;
int formatMax = 255 + test_range_ext;
int pixel_value = 0;
float *inputValues = NULL;
// First, fill with arbitrary floats
{
inputValues = (float *)(char*)imageValues;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
{
pixel_value = random_in_range( formatMin, (int)formatMax, d );
inputValues[ i ] = (float)(pixel_value/255.0f);
}
}
// Throw a few extra test values in there
inputValues = (float *)(char*)imageValues;
size_t i = 0;
// Piloting some debug inputs.
inputValues[ i++ ] = -0.5f;
inputValues[ i++ ] = 0.5f;
inputValues[ i++ ] = 2.f;
inputValues[ i++ ] = 0.5f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
}
}
else
{
// First, fill with arbitrary floats
{
float *inputValues = (float *)(char*)imageValues;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = -0.0000000000009f;
inputValues[ i++ ] = 1.f;
inputValues[ i++ ] = -1.f;
inputValues[ i++ ] = 2.f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
verifyRounding = true;
}
}
}
else if( inputType == kUInt )
{
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = 0;
inputValues[ i++ ] = 65535;
inputValues[ i++ ] = 7271820;
inputValues[ i++ ] = 0;
}
}
// Construct testing sources
clProtectedImage protImage;
clMemWrapper unprotImage;
cl_mem image;
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
{
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
// Do not use protected images for max image size test since it rounds the row size to a page size
if (gTestMaxImages) {
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
imageInfo->width, 0,
maxImageUseHostPtrBackingStore, NULL, &error );
} else {
error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width );
}
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
if (gTestMaxImages)
image = (cl_mem)unprotImage;
else
image = (cl_mem)protImage;
}
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
{
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
// it works just as if no flag is specified, so we just do the same thing either way
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
if( gTestMipmaps )
{
cl_image_desc image_desc = {0};
image_desc.image_type = imageInfo->type;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
image_desc.image_width = imageInfo->width;
image_desc.image_array_size = imageInfo->arraySize;
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
imageInfo->format, &image_desc, NULL, &error);
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create %d level 1D image of size %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width,
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
else
{
unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
imageInfo->width, 0,
imageValues, NULL, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
image = unprotImage;
}
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
test_error( error, "Unable to set kernel arguments" );
size_t width_lod = imageInfo->width, nextLevelOffset = 0;
size_t origin[ 3 ] = { 0, 0, 0 };
size_t region[ 3 ] = { imageInfo->width, 1, 1 };
size_t resultSize;
for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
if(gTestMipmaps)
{
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
}
clMemWrapper inputStream;
char *imagePtrOffset = imageValues + nextLevelOffset;
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
get_explicit_type_size( inputType ) * 4 * width_lod, imagePtrOffset, &error );
test_error( error, "Unable to create input buffer" );
// Set arguments
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
test_error( error, "Unable to set kernel arguments" );
// Run the kernel
threads[0] = (size_t)width_lod;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get results
if( gTestMipmaps )
resultSize = width_lod * get_pixel_size( imageInfo->format );
else
resultSize = imageInfo->rowPitch;
clProtectedArray PA(resultSize);
char *resultValues = (char *)((void *)PA);
if( gDebugTrace )
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
origin[ 1 ] = lod;
region[ 0 ] = width_lod;
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
if( gDebugTrace )
log_info( " results read\n" );
// Validate results element by element
char *imagePtr = imageValues + nextLevelOffset;
int numTries = 5;
{
char *resultPtr = (char *)resultValues;
for( size_t x = 0, i = 0; x < width_lod; x++, i++ )
{
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
// Convert this pixel
if( inputType == kFloat )
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
else if( inputType == kInt )
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
else // if( inputType == kUInt )
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
// Compare against the results
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// Compare sRGB-mapped values
cl_float expected[4] = {0};
cl_float* input_values = (float*)imagePtr;
cl_uchar *actual = (cl_uchar*)resultPtr;
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
float err[4] = {0.0f};
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if(j < 3)
{
expected[j] = sRGBmap(input_values[j]);
}
else // there is no sRGB conversion for alpha component if it exists
{
expected[j] = NORMALIZE(input_values[j], 255.0f);
}
err[j] = fabsf( expected[ j ] - actual[ j ] );
}
if ((err[0] > max_err) ||
(err[1] > max_err) ||
(err[2] > max_err) ||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
{
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
{
// Compare floats
float *expected = (float *)resultBuffer;
float *actual = (float *)resultPtr;
float err = 0.f;
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
err /= (float)get_format_channel_count( imageInfo->format );
if( err > MAX_ERR )
{
unsigned int *e = (unsigned int *)expected;
unsigned int *a = (unsigned int *)actual;
log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
log_error( " Error: %g\n", err );
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
totalErrors++;
if( ( --numTries ) == 0 )
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
{
// Compare half floats
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
{
cl_ushort *e = (cl_ushort *)resultBuffer;
cl_ushort *a = (cl_ushort *)resultPtr;
int err_cnt = 0;
//Fix up cases where we have NaNs
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
continue;
if( e[j] != a[j] )
err_cnt++;
}
if( err_cnt )
{
totalErrors++;
log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
if( inputType == kFloat )
{
float *p = (float *)(char *)imagePtr;
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
}
if( ( --numTries ) == 0 )
return 1;
}
}
}
else
{
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
// result is inexact. Calculate error
int failure = 1;
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
if( 0 == forceCorrectlyRoundedWrites &&
(
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
))
{
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
failure = 0;
}
if( failure )
{
totalErrors++;
// Is it our special rounding test?
if( verifyRounding && i >= 1 && i <= 2 )
{
// Try to guess what the rounding mode of the device really is based on what it returned
const char *deviceRounding = "unknown";
unsigned int deviceResults[8];
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
deviceRounding = "truncate";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to nearest";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to even";
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
return 1;
}
log_error( "ERROR: Sample %d (%d) did not validate!\n", (int)i, (int)x );
switch(imageInfo->format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_UNSIGNED_INT8:
case CL_SIGNED_INT8:
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
case CL_SIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_HALF_FLOAT:
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNSIGNED_INT32:
case CL_SIGNED_INT32:
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
break;
case CL_FLOAT:
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
}
float *v = (float *)(char *)imagePtr;
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
if( ( --numTries ) == 0 )
return 1;
}
}
}
imagePtr += get_explicit_type_size( inputType ) * 4;
resultPtr += get_pixel_size( imageInfo->format );
}
}
{
nextLevelOffset += width_lod * get_pixel_size( imageInfo->format );
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
}
}
}
// All done!
return totalErrors;
}
int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
{
char programSrc[10240];
const char *ptr;
const char *readFormat;
clProgramWrapper program;
clKernelWrapper kernel;
const char *KernelSourcePattern = NULL;
int error;
// Get our operating parameters
size_t maxWidth;
cl_ulong maxAllocSize, memSize;
size_t pixelSize;
image_descriptor imageInfo = { 0x0 };
imageInfo.format = format;
imageInfo.slicePitch = imageInfo.arraySize = 0;
imageInfo.height = imageInfo.depth = 1;
imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
pixelSize = get_pixel_size( imageInfo.format );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
test_error( error, "Unable to get max image 2D size from device" );
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
}
// Determine types
if( inputType == kInt )
readFormat = "i";
else if( inputType == kUInt )
readFormat = "ui";
else // kFloat
readFormat = "f";
// Construct the source
if(gtestTypesToRun & kWriteTests)
{
KernelSourcePattern = write1DKernelSourcePattern;
}
else
{
KernelSourcePattern = readwrite1DKernelSourcePattern;
}
sprintf( programSrc,
KernelSourcePattern,
get_explicit_type_name( inputType ),
gTestMipmaps ? ", int lod" : "",
readFormat,
gTestMipmaps ? ", lod" :"" );
ptr = programSrc;
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create testing kernel" );
// Run tests
if( gTestSmallImages )
{
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
{
imageInfo.rowPitch = imageInfo.width * pixelSize;
if(gTestMipmaps)
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
if( gDebugTrace )
log_info( " at size %d\n", (int)imageInfo.width );
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestMaxImages )
{
// Try a specific set of maximum sizes
size_t numbeOfSizes;
size_t sizes[100][3];
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format, CL_TRUE);
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
{
imageInfo.width = sizes[ idx ][ 0 ];
imageInfo.rowPitch = imageInfo.width * pixelSize;
if(gTestMipmaps)
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
log_info("Testing %d\n", (int)imageInfo.width);
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestRounding )
{
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
imageInfo.width = typeRange / 256;
imageInfo.rowPitch = imageInfo.width * pixelSize;
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
else
{
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
{
cl_ulong size;
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
// image, the result array, plus offset arrays, will fit in the global ram space
do
{
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
if( gTestMipmaps)
{
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
}
else
{
imageInfo.rowPitch = imageInfo.width * pixelSize;
if( gEnablePitch )
{
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.rowPitch += extraWidth * pixelSize;
}
size = (size_t)imageInfo.rowPitch * 4;
}
} while( size > maxAllocSize || ( size * 3 ) > memSize );
if( gDebugTrace )
{
log_info( " at size %d (pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
if( gTestMipmaps )
log_info( " and %d mip levels\n", (int)imageInfo.num_mip_levels );
}
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
return 0;
}

View File

@@ -0,0 +1,723 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#define MAX_ERR 0.005f
extern cl_command_queue queue;
extern cl_context context;
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
extern cl_filter_mode gFilterModeToSkip;
extern cl_mem_flags gMemFlagsToUse;
extern int gtestTypesToRun;
const char *readwrite1DArrayKernelSourcePattern =
"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
"%s"
" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
"}";
const char *write1DArrayKernelSourcePattern =
"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
"%s"
" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
"}";
const char *offset1DArraySource =
" int offset = tidY*get_image_width(output) + tidX;\n";
const char *offset1DArrayLodSource =
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
" int offset = tidY*width_lod + tidX;\n";
int test_write_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
{
int totalErrors = 0;
size_t num_flags = 0;
const cl_mem_flags *mem_flag_types = NULL;
const char * *mem_flag_names = NULL;
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
if(gtestTypesToRun & kWriteTests)
{
mem_flag_types = write_only_mem_flag_types;
mem_flag_names = write_only_mem_flag_names;
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
}
else
{
mem_flag_types = read_write_mem_flag_types;
mem_flag_names = read_write_mem_flag_names;
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
}
size_t pixelSize = get_pixel_size( imageInfo->format );
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
{
int error;
size_t threads[2];
bool verifyRounding = false;
int totalErrors = 0;
int forceCorrectlyRoundedWrites = 0;
#if defined( __APPLE__ )
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
cl_device_type type = 0;
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
{
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
return 1;
}
if( type == CL_DEVICE_TYPE_CPU )
forceCorrectlyRoundedWrites = 1;
#endif
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
if( DetectFloatToHalfRoundingMode(queue) )
return 1;
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
create_random_image_data( inputType, imageInfo, imageValues, d );
if(!gTestMipmaps)
{
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
{
/* Pilot data for sRGB images */
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// We want to generate ints (mostly) in range of the target format which should be [0,255]
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
// it can test some out-of-range data points
const unsigned int test_range_ext = 16;
int formatMin = 0 - test_range_ext;
int formatMax = 255 + test_range_ext;
int pixel_value = 0;
// First, fill with arbitrary floats
for( size_t y = 0; y < imageInfo->arraySize; y++ )
{
float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
{
pixel_value = random_in_range( formatMin, (int)formatMax, d );
inputValues[ i ] = (float)(pixel_value/255.0f);
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
// Piloting some debug inputs.
inputValues[ i++ ] = -0.5f;
inputValues[ i++ ] = 0.5f;
inputValues[ i++ ] = 2.f;
inputValues[ i++ ] = 0.5f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
}
}
else
{
// First, fill with arbitrary floats
for( size_t y = 0; y < imageInfo->arraySize; y++ )
{
float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = -0.0000000000009f;
inputValues[ i++ ] = 1.f;
inputValues[ i++ ] = -1.f;
inputValues[ i++ ] = 2.f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
verifyRounding = true;
}
}
}
else if( inputType == kUInt )
{
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = 0;
inputValues[ i++ ] = 65535;
inputValues[ i++ ] = 7271820;
inputValues[ i++ ] = 0;
}
}
// Construct testing sources
clProtectedImage protImage;
clMemWrapper unprotImage;
cl_mem image;
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
{
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
// Do not use protected images for max image size test since it rounds the row size to a page size
if (gTestMaxImages) {
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
imageInfo->width, imageInfo->arraySize, 0, 0,
maxImageUseHostPtrBackingStore, &error );
} else {
error = protImage.Create( context, (cl_mem_object_type)CL_MEM_OBJECT_IMAGE1D_ARRAY, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, 1, 1, imageInfo->arraySize );
}
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
if (gTestMaxImages)
image = (cl_mem)unprotImage;
else
image = (cl_mem)protImage;
}
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
{
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
// it works just as if no flag is specified, so we just do the same thing either way
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
if( gTestMipmaps )
{
cl_image_desc image_desc = {0};
image_desc.image_type = imageInfo->type;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
image_desc.image_width = imageInfo->width;
image_desc.image_array_size = imageInfo->arraySize;
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
imageInfo->format, &image_desc, NULL, &error);
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create %d level 1D image array of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->arraySize,
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
else
{
unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
imageInfo->width, imageInfo->arraySize, 0, 0,
imageValues, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
image = unprotImage;
}
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
test_error( error, "Unable to set kernel arguments" );
size_t width_lod = imageInfo->width, nextLevelOffset = 0;
size_t origin[ 3 ] = { 0, 0, 0 };
size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
size_t resultSize;
for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
if(gTestMipmaps)
{
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
}
// Run the kernel
threads[0] = (size_t)width_lod;
threads[1] = (size_t)imageInfo->arraySize;
clMemWrapper inputStream;
char *imagePtrOffset = imageValues + nextLevelOffset;
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
get_explicit_type_size( inputType ) * 4 * width_lod * imageInfo->arraySize, imagePtrOffset, &error );
test_error( error, "Unable to create input buffer" );
// Set arguments
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
test_error( error, "Unable to set kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get results
if( gTestMipmaps )
resultSize = width_lod * get_pixel_size(imageInfo->format) * imageInfo->arraySize;
else
resultSize = imageInfo->rowPitch * imageInfo->arraySize;
clProtectedArray PA(resultSize);
char *resultValues = (char *)((void *)PA);
if( gDebugTrace )
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
origin[2] = lod;
region[0] = width_lod;
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region,
gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
if( gDebugTrace )
log_info( " results read\n" );
// Validate results element by element
char *imagePtr = imageValues + nextLevelOffset;
int numTries = 5;
for( size_t y = 0, i = 0; y < imageInfo->arraySize; y++ )
{
char *resultPtr;
if( gTestMipmaps )
resultPtr = (char *)resultValues + y * width_lod * pixelSize;
else
resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
for( size_t x = 0; x < width_lod; x++, i++ )
{
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
// Convert this pixel
if( inputType == kFloat )
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
else if( inputType == kInt )
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
else // if( inputType == kUInt )
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
// Compare against the results
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// Compare sRGB-mapped values
cl_float expected[4] = {0};
cl_float* input_values = (float*)imagePtr;
cl_uchar *actual = (cl_uchar*)resultPtr;
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
float err[4] = {0.0f};
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if(j < 3)
{
expected[j] = sRGBmap(input_values[j]);
}
else // there is no sRGB conversion for alpha component if it exists
{
expected[j] = NORMALIZE(input_values[j], 255.0f);
}
err[j] = fabsf( expected[ j ] - actual[ j ] );
}
if ((err[0] > max_err) ||
(err[1] > max_err) ||
(err[2] > max_err) ||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
{
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
{
// Compare floats
float *expected = (float *)resultBuffer;
float *actual = (float *)resultPtr;
float err = 0.f;
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
err /= (float)get_format_channel_count( imageInfo->format );
if( err > MAX_ERR )
{
unsigned int *e = (unsigned int *)expected;
unsigned int *a = (unsigned int *)actual;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Error: %g\n", err );
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
totalErrors++;
if( ( --numTries ) == 0 )
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
{
// Compare half floats
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
{
cl_ushort *e = (cl_ushort *)resultBuffer;
cl_ushort *a = (cl_ushort *)resultPtr;
int err_cnt = 0;
//Fix up cases where we have NaNs
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
continue;
if( e[j] != a[j] )
err_cnt++;
}
if( err_cnt )
{
totalErrors++;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
if( inputType == kFloat )
{
float *p = (float *)(char *)imagePtr;
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
}
if( ( --numTries ) == 0 )
return 1;
}
}
}
else
{
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
{
// result is inexact. Calculate error
int failure = 1;
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
if( 0 == forceCorrectlyRoundedWrites &&
(
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
))
{
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
failure = 0;
}
if( failure )
{
totalErrors++;
// Is it our special rounding test?
if( verifyRounding && i >= 1 && i <= 2 )
{
// Try to guess what the rounding mode of the device really is based on what it returned
const char *deviceRounding = "unknown";
unsigned int deviceResults[8];
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
deviceRounding = "truncate";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to nearest";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to even";
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
return 1;
}
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
switch(imageInfo->format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_UNSIGNED_INT8:
case CL_SIGNED_INT8:
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
case CL_SIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_HALF_FLOAT:
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNSIGNED_INT32:
case CL_SIGNED_INT32:
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
break;
case CL_FLOAT:
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
}
float *v = (float *)(char *)imagePtr;
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
if( ( --numTries ) == 0 )
return 1;
}
}
}
imagePtr += get_explicit_type_size( inputType ) * 4;
resultPtr += pixelSize;
}
}
{
nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
}
}
}
// All done!
return totalErrors;
}
int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
{
char programSrc[10240];
const char *ptr;
const char *readFormat;
clProgramWrapper program;
clKernelWrapper kernel;
const char *KernelSourcePattern = NULL;
int error;
// Get our operating parameters
size_t maxWidth, maxArraySize;
cl_ulong maxAllocSize, memSize;
size_t pixelSize;
image_descriptor imageInfo = { 0x0 };
imageInfo.format = format;
imageInfo.slicePitch = 0;
imageInfo.height = imageInfo.depth = 1;
imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
pixelSize = get_pixel_size( imageInfo.format );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
test_error( error, "Unable to get max image 2D size from device" );
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
}
// Determine types
if( inputType == kInt )
readFormat = "i";
else if( inputType == kUInt )
readFormat = "ui";
else // kFloat
readFormat = "f";
if(gtestTypesToRun & kWriteTests)
{
KernelSourcePattern = write1DArrayKernelSourcePattern;
}
else
{
KernelSourcePattern = readwrite1DArrayKernelSourcePattern;
}
// Construct the source
// Construct the source
sprintf( programSrc,
KernelSourcePattern,
get_explicit_type_name( inputType ),
gTestMipmaps ? ", int lod" : "",
gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource,
readFormat,
gTestMipmaps ? ", lod" :"" );
ptr = programSrc;
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create testing kernel" );
// Run tests
if( gTestSmallImages )
{
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
{
imageInfo.rowPitch = imageInfo.width * pixelSize;
imageInfo.slicePitch = imageInfo.rowPitch;
for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
{
if(gTestMipmaps)
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
if( gDebugTrace )
log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
}
else if( gTestMaxImages )
{
// Try a specific set of maximum sizes
size_t numbeOfSizes;
size_t sizes[100][3];
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format, CL_TRUE);
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
{
imageInfo.width = sizes[ idx ][ 0 ];
imageInfo.arraySize = sizes[ idx ][ 2 ];
imageInfo.rowPitch = imageInfo.width * pixelSize;
imageInfo.slicePitch = imageInfo.rowPitch;
if(gTestMipmaps)
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestRounding )
{
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
imageInfo.arraySize = typeRange / 256;
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
imageInfo.rowPitch = imageInfo.width * pixelSize;
imageInfo.slicePitch = imageInfo.rowPitch;
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
else
{
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
{
cl_ulong size;
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
// image, the result array, plus offset arrays, will fit in the global ram space
do
{
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
if( gTestMipmaps)
{
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
}
else
{
imageInfo.rowPitch = imageInfo.width * pixelSize;
if( gEnablePitch )
{
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.rowPitch += extraWidth * pixelSize;
}
imageInfo.slicePitch = imageInfo.rowPitch;
size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
}
} while( size > maxAllocSize || ( size * 3 ) > memSize );
if( gDebugTrace )
log_info( " at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
return 0;
}

View File

@@ -0,0 +1,771 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#define MAX_ERR 0.005f
extern cl_command_queue queue;
extern cl_context context;
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
extern cl_filter_mode gFilterModeToSkip;
extern cl_mem_flags gMemFlagsToUse;
extern int gtestTypesToRun;
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
// Utility function to clamp down image sizes for certain tests to avoid
// using too much memory.
static size_t reduceImageSizeRange(size_t maxDimSize) {
size_t DimSize = maxDimSize/32;
if (DimSize < (size_t) 16)
return 16;
else if (DimSize > (size_t) 128)
return 128;
else
return DimSize;
}
static size_t reduceImageDepth(size_t maxDepth) {
size_t Depth = maxDepth/32;
if (Depth < (size_t) 8)
return 8;
else if (Depth > (size_t) 32)
return 32;
else
return Depth;
}
const char *write2DArrayKernelSourcePattern =
"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
"%s"
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
"}";
const char *readwrite2DArrayKernelSourcePattern =
"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
"%s"
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
"}";
const char *offset2DArrayKernelSource =
" int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
const char *offset2DArrayLodKernelSource =
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
" int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
" int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
{
int totalErrors = 0;
size_t num_flags = 0;
const cl_mem_flags *mem_flag_types = NULL;
const char * *mem_flag_names = NULL;
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
if(gtestTypesToRun & kWriteTests)
{
mem_flag_types = write_only_mem_flag_types;
mem_flag_names = write_only_mem_flag_names;
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
}
else
{
mem_flag_types = read_write_mem_flag_types;
mem_flag_names = read_write_mem_flag_names;
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
}
size_t pixelSize = get_pixel_size( imageInfo->format );
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
{
int error;
size_t threads[3];
bool verifyRounding = false;
int totalErrors = 0;
int forceCorrectlyRoundedWrites = 0;
#if defined( __APPLE__ )
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
cl_device_type type = 0;
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
{
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
return 1;
}
if( type == CL_DEVICE_TYPE_CPU )
forceCorrectlyRoundedWrites = 1;
#endif
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
if( DetectFloatToHalfRoundingMode(queue) )
return 1;
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
create_random_image_data( inputType, imageInfo, imageValues, d );
if(!gTestMipmaps)
{
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
{
/* Pilot data for sRGB images */
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// We want to generate ints (mostly) in range of the target format which should be [0,255]
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
// it can test some out-of-range data points
const unsigned int test_range_ext = 16;
int formatMin = 0 - test_range_ext;
int formatMax = 255 + test_range_ext;
int pixel_value = 0;
// First, fill with arbitrary floats
for( size_t z = 0; z < imageInfo->arraySize; z++ )
{
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
{
pixel_value = random_in_range( formatMin, (int)formatMax, d );
inputValues[ i ] = (float)(pixel_value/255.0f);
}
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
// Piloting some debug inputs.
inputValues[ i++ ] = -0.5f;
inputValues[ i++ ] = 0.5f;
inputValues[ i++ ] = 2.f;
inputValues[ i++ ] = 0.5f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
}
}
else
{
// First, fill with arbitrary floats
for( size_t z = 0; z < imageInfo->arraySize; z++ )
{
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = -0.0000000000009f;
inputValues[ i++ ] = 1.f;
inputValues[ i++ ] = -1.f;
inputValues[ i++ ] = 2.f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
verifyRounding = true;
}
}
}
else if( inputType == kUInt )
{
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = 0;
inputValues[ i++ ] = 65535;
inputValues[ i++ ] = 7271820;
inputValues[ i++ ] = 0;
}
}
// Construct testing sources
clProtectedImage protImage;
clMemWrapper unprotImage;
cl_mem image;
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
{
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
maxImageUseHostPtrBackingStore, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
return error;
}
image = (cl_mem)unprotImage;
}
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
{
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
// it works just as if no flag is specified, so we just do the same thing either way
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
if( gTestMipmaps )
{
cl_image_desc image_desc = {0};
image_desc.image_type = imageInfo->type;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
image_desc.image_width = imageInfo->width;
image_desc.image_height = imageInfo->height;
image_desc.image_array_size = imageInfo->arraySize;
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
imageInfo->format, &image_desc, NULL, &error);
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create %d level 2D image array of size %ld x %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->arraySize,
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
else
{
unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
return error;
}
}
image = unprotImage;
}
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
test_error( error, "Unable to set kernel arguments" );
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
size_t origin[ 4 ] = { 0, 0, 0, 0 };
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
size_t resultSize;
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
for( int lod = 0; lod < num_lod_loops; lod++)
{
if(gTestMipmaps)
{
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
}
// Run the kernel
threads[0] = (size_t)width_lod;
threads[1] = (size_t)height_lod;
threads[2] = (size_t)imageInfo->arraySize;
clMemWrapper inputStream;
char *imagePtrOffset = imageValues + nextLevelOffset;
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * imageInfo->arraySize, imagePtrOffset, &error );
test_error( error, "Unable to create input buffer" );
// Set arguments
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
test_error( error, "Unable to set kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get results
if( gTestMipmaps )
resultSize = width_lod * height_lod *imageInfo->arraySize * pixelSize;
else
resultSize = imageInfo->slicePitch *imageInfo->arraySize;
clProtectedArray PA(resultSize);
char *resultValues = (char *)((void *)PA);
if( gDebugTrace )
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
origin[3] = lod;
region[0] = width_lod;
region[1] = height_lod;
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
if( gDebugTrace )
log_info( " results read\n" );
// Validate results element by element
char *imagePtr = imageValues + nextLevelOffset;
int numTries = 5;
for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
{
for( size_t y = 0; y < height_lod; y++ )
{
char *resultPtr;
if( gTestMipmaps )
resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
else
resultPtr = (char*)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
for( size_t x = 0; x < width_lod; x++, i++ )
{
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
// Convert this pixel
if( inputType == kFloat )
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
else if( inputType == kInt )
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
else // if( inputType == kUInt )
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
// Compare against the results
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// Compare sRGB-mapped values
cl_float expected[4] = {0};
cl_float* input_values = (float*)imagePtr;
cl_uchar *actual = (cl_uchar*)resultPtr;
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
float err[4] = {0.0f};
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if(j < 3)
{
expected[j] = sRGBmap(input_values[j]);
}
else // there is no sRGB conversion for alpha component if it exists
{
expected[j] = NORMALIZE(input_values[j], 255.0f);
}
err[j] = fabsf( expected[ j ] - actual[ j ] );
}
if ((err[0] > max_err) ||
(err[1] > max_err) ||
(err[2] > max_err) ||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
{
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
{
// Compare floats
float *expected = (float *)resultBuffer;
float *actual = (float *)resultPtr;
float err = 0.f;
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
err /= (float)get_format_channel_count( imageInfo->format );
if( err > MAX_ERR )
{
unsigned int *e = (unsigned int *)expected;
unsigned int *a = (unsigned int *)actual;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Error: %g\n", err );
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
totalErrors++;
if( ( --numTries ) == 0 )
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
{
// Compare half floats
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
{
cl_ushort *e = (cl_ushort *)resultBuffer;
cl_ushort *a = (cl_ushort *)resultPtr;
int err_cnt = 0;
//Fix up cases where we have NaNs
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
continue;
if( e[j] != a[j] )
err_cnt++;
}
if( err_cnt )
{
totalErrors++;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
unsigned short *e = (unsigned short *)resultBuffer;
unsigned short *a = (unsigned short *)resultPtr;
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
if( inputType == kFloat )
{
float *p = (float *)(char *)imagePtr;
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
}
if( ( --numTries ) == 0 )
return 1;
}
}
}
else
{
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
// result is inexact. Calculate error
int failure = 1;
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
if( 0 == forceCorrectlyRoundedWrites &&
(
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
))
{
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
failure = 0;
}
if( failure )
{
totalErrors++;
// Is it our special rounding test?
if( verifyRounding && i >= 1 && i <= 2 )
{
// Try to guess what the rounding mode of the device really is based on what it returned
const char *deviceRounding = "unknown";
unsigned int deviceResults[8];
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
deviceRounding = "truncate";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to nearest";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to even";
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
return 1;
}
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
switch(imageInfo->format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_UNSIGNED_INT8:
case CL_SIGNED_INT8:
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
case CL_SIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_HALF_FLOAT:
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNSIGNED_INT32:
case CL_SIGNED_INT32:
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
break;
case CL_FLOAT:
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
}
float *v = (float *)(char *)imagePtr;
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
if( ( --numTries ) == 0 )
return 1;
}
}
}
imagePtr += get_explicit_type_size( inputType ) * (( imageInfo->format->image_channel_order == CL_DEPTH ) ? 1 : 4);
resultPtr += get_pixel_size( imageInfo->format );
}
}
}
{
nextLevelOffset += width_lod*height_lod*imageInfo->arraySize*pixelSize;
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
}
}
}
// All done!
return totalErrors;
}
int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
{
char programSrc[10240];
const char *ptr;
const char *readFormat;
clProgramWrapper program;
clKernelWrapper kernel;
const char *KernelSourcePattern = NULL;
int error;
// Get our operating parameters
size_t maxWidth, maxHeight, maxArraySize;
cl_ulong maxAllocSize, memSize;
image_descriptor imageInfo = { 0x0 };
imageInfo.format = format;
imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
imageInfo.depth = 1;
imageInfo.slicePitch = 0;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
test_error( error, "Unable to get max image 3D size from device" );
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
}
// Determine types
if( inputType == kInt )
readFormat = "i";
else if( inputType == kUInt )
readFormat = "ui";
else // kFloat
readFormat = "f";
if(gtestTypesToRun & kWriteTests)
{
KernelSourcePattern = write2DArrayKernelSourcePattern;
}
else
{
KernelSourcePattern = readwrite2DArrayKernelSourcePattern;
}
// Construct the source
// Construct the source
sprintf( programSrc,
KernelSourcePattern,
get_explicit_type_name( inputType ),
(format->image_channel_order == CL_DEPTH) ? "" : "4",
(format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
gTestMipmaps ? " , int lod" : "",
gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
readFormat,
gTestMipmaps ? ", lod" : "" );
ptr = programSrc;
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create testing kernel" );
// Run tests
if( gTestSmallImages )
{
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
{
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
{
if( gTestMipmaps )
imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
if( gDebugTrace )
log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
}
}
else if( gTestMaxImages )
{
// Try a specific set of maximum sizes
size_t numbeOfSizes;
size_t sizes[100][3];
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
{
imageInfo.width = sizes[ idx ][ 0 ];
imageInfo.height = sizes[ idx ][ 1 ];
imageInfo.arraySize = sizes[ idx ][ 2 ];
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
if( gTestMipmaps )
imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestRounding )
{
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
imageInfo.height = typeRange / 256;
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
imageInfo.arraySize = 2;
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
else
{
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
{
int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
cl_ulong size, buffSize;
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
// image, the result array, plus offset arrays, will fit in the global ram space
do
{
imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, d );
imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
if(gTestMipmaps)
{
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1), d);
//Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
size = 4 * compute_mipmapped_image_size(imageInfo);
buffSize = size * get_explicit_type_size( inputType );
}
else
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
if( gEnablePitch )
{
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
}
// Image size and buffer size may differ due to different pixel size.
// See creation of buffer at line ~153.
size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4;
buffSize = (cl_ulong)imageInfo.width * (cl_ulong)imageInfo.height * imageInfo.arraySize * get_explicit_type_size(inputType) * 4;
}
} while( size > maxAllocSize || buffSize > maxAllocSize || ( size * 3 ) > memSize );
if( gDebugTrace )
log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize,
imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
return 0;
}

View File

@@ -0,0 +1,768 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#define MAX_ERR 0.005f
extern cl_command_queue queue;
extern cl_context context;
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
extern cl_filter_mode gFilterModeToSkip;
extern cl_mem_flags gMemFlagsToUse;
extern int gtestTypesToRun;
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
// Utility function to clamp down image sizes for certain tests to avoid
// using too much memory.
static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
if (DimSize > (size_t) 128)
return 128;
else
return DimSize;
}
static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
if (DimSize > (size_t) 32)
return 32;
else
return DimSize;
}
const char *write3DKernelSourcePattern =
"%s"
"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
"%s"
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
"}";
const char *readwrite3DKernelSourcePattern =
"%s"
"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
"%s"
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
"}";
const char *khr3DWritesPragma =
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
const char *offset3DSource=
" int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
const char *offset3DLodSource =
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
" int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
" int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
{
int totalErrors = 0;
size_t num_flags = 0;
const cl_mem_flags *mem_flag_types = NULL;
const char * *mem_flag_names = NULL;
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
if(gtestTypesToRun & kWriteTests)
{
mem_flag_types = write_only_mem_flag_types;
mem_flag_names = write_only_mem_flag_names;
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
}
else
{
mem_flag_types = read_write_mem_flag_types;
mem_flag_names = read_write_mem_flag_names;
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
}
size_t pixelSize = get_pixel_size( imageInfo->format );
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
{
int error;
size_t threads[3];
bool verifyRounding = false;
int totalErrors = 0;
int forceCorrectlyRoundedWrites = 0;
#if defined( __APPLE__ )
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
cl_device_type type = 0;
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
{
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
return 1;
}
if( type == CL_DEVICE_TYPE_CPU )
forceCorrectlyRoundedWrites = 1;
#endif
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
if( DetectFloatToHalfRoundingMode(queue) )
return 1;
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
create_random_image_data( inputType, imageInfo, imageValues, d );
if(!gTestMipmaps)
{
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
{
/* Pilot data for sRGB images */
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// We want to generate ints (mostly) in range of the target format which should be [0,255]
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
// it can test some out-of-range data points
const unsigned int test_range_ext = 16;
int formatMin = 0 - test_range_ext;
int formatMax = 255 + test_range_ext;
int pixel_value = 0;
// First, fill with arbitrary floats
for( size_t z = 0; z < imageInfo->depth; z++ )
{
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
{
pixel_value = random_in_range( formatMin, (int)formatMax, d );
inputValues[ i ] = (float)(pixel_value/255.0f);
}
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
// Piloting some debug inputs.
inputValues[ i++ ] = -0.5f;
inputValues[ i++ ] = 0.5f;
inputValues[ i++ ] = 2.f;
inputValues[ i++ ] = 0.5f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
}
}
else
{
// First, fill with arbitrary floats
for( size_t z = 0; z < imageInfo->depth; z++ )
{
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = -0.0000000000009f;
inputValues[ i++ ] = 1.f;
inputValues[ i++ ] = -1.f;
inputValues[ i++ ] = 2.f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
verifyRounding = true;
}
}
}
else if( inputType == kUInt )
{
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = 0;
inputValues[ i++ ] = 65535;
inputValues[ i++ ] = 7271820;
inputValues[ i++ ] = 0;
}
}
// Construct testing sources
clProtectedImage protImage;
clMemWrapper unprotImage;
cl_mem image;
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
{
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
maxImageUseHostPtrBackingStore, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
return error;
}
image = (cl_mem)unprotImage;
}
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
{
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
// it works just as if no flag is specified, so we just do the same thing either way
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
if(gTestMipmaps)
{
cl_image_desc image_desc = {0};
image_desc.image_type = imageInfo->type;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
image_desc.image_width = imageInfo->width;
image_desc.image_height = imageInfo->height;
image_desc.image_depth = imageInfo->depth;
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
imageInfo->format, &image_desc, NULL, &error);
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
else
{
unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
return error;
}
}
image = unprotImage;
}
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
test_error( error, "Unable to set kernel arguments" );
size_t width_lod = imageInfo->width;
size_t height_lod = imageInfo->height;
size_t depth_lod = imageInfo->depth;
size_t nextLevelOffset = 0;
size_t origin[ 4 ] = { 0, 0, 0, 0 };
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
for( int lod = 0; lod < num_lod_loops; lod++)
{
if(gTestMipmaps)
{
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
}
// Run the kernel
threads[0] = (size_t)width_lod;
threads[1] = (size_t)height_lod;
threads[2] = (size_t)depth_lod;
clMemWrapper inputStream;
char *imagePtrOffset = imageValues + nextLevelOffset;
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * depth_lod, imagePtrOffset, &error );
test_error( error, "Unable to create input buffer" );
// Set arguments
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
test_error( error, "Unable to set kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get results
size_t resultSize;
if(gTestMipmaps)
resultSize = width_lod * height_lod * depth_lod * pixelSize;
else
resultSize = imageInfo->slicePitch *imageInfo->depth;
clProtectedArray PA(resultSize);
char *resultValues = (char *)((void *)PA);
if( gDebugTrace )
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
origin[3] = lod;
region[0] = width_lod;
region[1] = height_lod;
region[2] = depth_lod;
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
if( gDebugTrace )
log_info( " results read\n" );
// Validate results element by element
char *imagePtr = (char*)imageValues + nextLevelOffset;
int numTries = 5;
for( size_t z = 0, i = 0; z < depth_lod; z++ )
{
for( size_t y = 0; y < height_lod; y++ )
{
char *resultPtr;
if( gTestMipmaps )
resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
else
resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
for( size_t x = 0; x < width_lod; x++, i++ )
{
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
// Convert this pixel
if( inputType == kFloat )
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
else if( inputType == kInt )
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
else // if( inputType == kUInt )
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
// Compare against the results
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// Compare sRGB-mapped values
cl_float expected[4] = {0};
cl_float* input_values = (float*)imagePtr;
cl_uchar *actual = (cl_uchar*)resultPtr;
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
float err[4] = {0.0f};
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if(j < 3)
{
expected[j] = sRGBmap(input_values[j]);
}
else // there is no sRGB conversion for alpha component if it exists
{
expected[j] = NORMALIZE(input_values[j], 255.0f);
}
err[j] = fabsf( expected[ j ] - actual[ j ] );
}
if ((err[0] > max_err) ||
(err[1] > max_err) ||
(err[2] > max_err) ||
(err[3] > FLT_EPSILON)) // there is no conversion for alpha
{
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
{
// Compare floats
float *expected = (float *)resultBuffer;
float *actual = (float *)resultPtr;
float err = 0.f;
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
err /= (float)get_format_channel_count( imageInfo->format );
if( err > MAX_ERR )
{
unsigned int *e = (unsigned int *)expected;
unsigned int *a = (unsigned int *)actual;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Error: %g\n", err );
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
totalErrors++;
if( ( --numTries ) == 0 )
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
{
// Compare half floats
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
{
cl_ushort *e = (cl_ushort *)resultBuffer;
cl_ushort *a = (cl_ushort *)resultPtr;
int err_cnt = 0;
//Fix up cases where we have NaNs
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
continue;
if( e[j] != a[j] )
err_cnt++;
}
if( err_cnt )
{
totalErrors++;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
unsigned short *e = (unsigned short *)resultBuffer;
unsigned short *a = (unsigned short *)resultPtr;
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
if( inputType == kFloat )
{
float *p = (float *)(char *)imagePtr;
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
}
if( ( --numTries ) == 0 )
return 1;
}
}
}
else
{
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
// result is inexact. Calculate error
int failure = 1;
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
if( 0 == forceCorrectlyRoundedWrites &&
(
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
))
{
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
failure = 0;
}
if( failure )
{
totalErrors++;
// Is it our special rounding test?
if( verifyRounding && i >= 1 && i <= 2 )
{
// Try to guess what the rounding mode of the device really is based on what it returned
const char *deviceRounding = "unknown";
unsigned int deviceResults[8];
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
deviceRounding = "truncate";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to nearest";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to even";
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
return 1;
}
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
switch(imageInfo->format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_UNSIGNED_INT8:
case CL_SIGNED_INT8:
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
case CL_SIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_HALF_FLOAT:
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNSIGNED_INT32:
case CL_SIGNED_INT32:
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
break;
case CL_FLOAT:
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
}
float *v = (float *)(char *)imagePtr;
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
if( ( --numTries ) == 0 )
return 1;
}
}
}
imagePtr += get_explicit_type_size( inputType ) * 4;
resultPtr += get_pixel_size( imageInfo->format );
}
}
}
{
nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
}
}
}
// All done!
return totalErrors;
}
int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
{
char programSrc[10240];
const char *ptr;
const char *readFormat;
clProgramWrapper program;
clKernelWrapper kernel;
const char *KernelSourcePattern = NULL;
int error;
// Get our operating parameters
size_t maxWidth, maxHeight, maxDepth;
cl_ulong maxAllocSize, memSize;
image_descriptor imageInfo = { 0x0 };
imageInfo.format = format;
imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
test_error( error, "Unable to get max image 3D size from device" );
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
}
// Determine types
if( inputType == kInt )
readFormat = "i";
else if( inputType == kUInt )
readFormat = "ui";
else // kFloat
readFormat = "f";
if(gtestTypesToRun & kWriteTests)
{
KernelSourcePattern = write3DKernelSourcePattern;
}
else
{
KernelSourcePattern = readwrite3DKernelSourcePattern;
}
// Construct the source
sprintf( programSrc,
KernelSourcePattern,
gTestMipmaps ? "" : khr3DWritesPragma,
get_explicit_type_name( inputType ),
gTestMipmaps ? ", int lod" : "",
gTestMipmaps ? offset3DLodSource : offset3DSource,
readFormat,
gTestMipmaps ? ", lod" : "" );
ptr = programSrc;
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create testing kernel" );
// Run tests
if( gTestSmallImages )
{
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
{
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
{
if (gTestMipmaps)
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
if( gDebugTrace )
log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
}
}
else if( gTestMaxImages )
{
// Try a specific set of maximum sizes
size_t numbeOfSizes;
size_t sizes[100][3];
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
{
imageInfo.width = sizes[ idx ][ 0 ];
imageInfo.height = sizes[ idx ][ 1 ];
imageInfo.depth = sizes[ idx ][ 2 ];
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
if (gTestMipmaps)
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestRounding )
{
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
imageInfo.height = typeRange / 256;
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
imageInfo.depth = 1;
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
else
{
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
{
cl_ulong size;
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
// image, the result array, plus offset arrays, will fit in the global ram space
do
{
imageInfo.width = reduceImageSizeRange(maxWidth, d );
imageInfo.height = reduceImageSizeRange(maxHeight, d );
imageInfo.depth = reduceImageDepth(maxDepth, d );
if(gTestMipmaps)
{
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
//Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
}
else
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
if( gEnablePitch )
{
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
}
size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
}
} while( size > maxAllocSize || ( size * 3 ) > memSize );
if( gDebugTrace )
log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
return 0;
}

View File

@@ -0,0 +1,887 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../testBase.h"
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#define MAX_ERR 0.005f
extern cl_command_queue queue;
extern cl_context context;
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestImage2DFromBuffer, gTestMipmaps;
extern cl_filter_mode gFilterModeToSkip;
extern cl_mem_flags gMemFlagsToUse;
extern int gtestTypesToRun;
extern int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
extern int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
extern int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
extern int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
const char *writeKernelSourcePattern =
"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
"%s"
" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
"}";
const char *read_writeKernelSourcePattern =
"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
"%s"
" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
"}";
const char *offset2DKernelSource =
" int offset = tidY*get_image_width(output) + tidX;\n";
const char *offset2DLodKernelSource =
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
" int offset = tidY * width_lod + tidX;\n";
int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
{
int totalErrors = 0;
size_t num_flags = 0;
const cl_mem_flags *mem_flag_types = NULL;
const char * *mem_flag_names = NULL;
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
if(gtestTypesToRun & kWriteTests)
{
mem_flag_types = write_only_mem_flag_types;
mem_flag_names = write_only_mem_flag_names;
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
}
else
{
mem_flag_types = read_write_mem_flag_types;
mem_flag_names = read_write_mem_flag_names;
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
}
size_t pixelSize = get_pixel_size( imageInfo->format );
int channel_scale = (imageInfo->format->image_channel_order == CL_DEPTH) ? 1 : 4;
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
{
int error;
size_t threads[2];
bool verifyRounding = false;
int totalErrors = 0;
int forceCorrectlyRoundedWrites = 0;
#if defined( __APPLE__ )
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
cl_device_type type = 0;
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
{
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
return 1;
}
if( type == CL_DEVICE_TYPE_CPU )
forceCorrectlyRoundedWrites = 1;
#endif
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
if( DetectFloatToHalfRoundingMode(queue) )
return 1;
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues, imageBufferValues;
create_random_image_data( inputType, imageInfo, imageValues, d, gTestImage2DFromBuffer );
if(!gTestMipmaps)
{
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
{
/* Pilot data for sRGB images */
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// We want to generate ints (mostly) in range of the target format which should be [0,255]
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
// it can test some out-of-range data points
const unsigned int test_range_ext = 16;
int formatMin = 0 - test_range_ext;
int formatMax = 255 + test_range_ext;
int pixel_value = 0;
// First, fill with arbitrary floats
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
for( size_t i = 0; i < imageInfo->width * 4; i++ )
{
pixel_value = random_in_range( formatMin, (int)formatMax, d );
inputValues[ i ] = (float)(pixel_value/255.0f);
}
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
// Piloting some debug inputs.
inputValues[ i++ ] = -0.5f;
inputValues[ i++ ] = 0.5f;
inputValues[ i++ ] = 2.0f;
inputValues[ i++ ] = 0.5f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
}
}
else
{
// First, fill with arbitrary floats
for( size_t y = 0; y < imageInfo->height; y++ )
{
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * channel_scale;
for( size_t i = 0; i < imageInfo->width * channel_scale; i++ )
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
}
// Throw a few extra test values in there
float *inputValues = (float *)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = -0.0000000000009f;
inputValues[ i++ ] = 1.f;
inputValues[ i++ ] = -1.f;
inputValues[ i++ ] = 2.f;
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
// is correct
if( imageInfo->width > 12 )
{
float formatMax = (float)get_format_max_int( imageInfo->format );
inputValues[ i++ ] = 4.0f / formatMax;
inputValues[ i++ ] = 4.3f / formatMax;
inputValues[ i++ ] = 4.5f / formatMax;
inputValues[ i++ ] = 4.7f / formatMax;
inputValues[ i++ ] = 5.0f / formatMax;
inputValues[ i++ ] = 5.3f / formatMax;
inputValues[ i++ ] = 5.5f / formatMax;
inputValues[ i++ ] = 5.7f / formatMax;
verifyRounding = true;
}
}
}
else if( inputType == kUInt )
{
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
size_t i = 0;
inputValues[ i++ ] = 0;
inputValues[ i++ ] = 65535;
inputValues[ i++ ] = 7271820;
inputValues[ i++ ] = 0;
}
}
// Construct testing sources
clProtectedImage protImage;
clMemWrapper unprotImage;
cl_mem image;
cl_mem imageBuffer;
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
{
if (gTestImage2DFromBuffer)
{
imageBuffer = clCreateBuffer( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR,
imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
test_error( error, "Unable to create buffer" );
unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->rowPitch,
imageBuffer, &error );
}
else
{
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
// Do not use protected images for max image size test since it rounds the row size to a page size
if (gTestMaxImages) {
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
imageInfo->width, imageInfo->height, 0,
maxImageUseHostPtrBackingStore, &error );
} else {
error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
}
}
if( error != CL_SUCCESS )
{
if (gTestImage2DFromBuffer) {
clReleaseMemObject(imageBuffer);
if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
return 0;
}
}
log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
if (gTestMaxImages || gTestImage2DFromBuffer)
image = (cl_mem)unprotImage;
else
image = (cl_mem)protImage;
}
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
{
if( gTestMipmaps )
{
cl_image_desc image_desc = {0};
image_desc.image_type = imageInfo->type;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
image_desc.image_width = imageInfo->width;
image_desc.image_height = imageInfo->height;
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
imageInfo->format, &image_desc, NULL, &error);
if( error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create %d level 2D image of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height,
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
}
else if (gTestImage2DFromBuffer)
{
generate_random_image_data( imageInfo, imageBufferValues, d );
imageBuffer = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
imageInfo->rowPitch * imageInfo->height, imageBufferValues, &error);
test_error( error, "Unable to create buffer" );
unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
imageInfo->width, imageInfo->height, imageInfo->rowPitch,
imageBuffer, &error );
}
else
{
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
// it works just as if no flag is specified, so we just do the same thing either way
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
imageInfo->width, imageInfo->height, 0,
imageValues, &error );
}
if( error != CL_SUCCESS )
{
if (gTestImage2DFromBuffer) {
clReleaseMemObject(imageBuffer);
if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
return 0;
}
}
log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
return error;
}
image = unprotImage;
}
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
test_error( error, "Unable to set kernel arguments" );
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
size_t origin[ 3 ] = { 0, 0, 0 };
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
size_t resultSize;
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
for( int lod = 0; lod < num_lod_loops; lod++)
{
if(gTestMipmaps)
{
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
}
// Run the kernel
threads[0] = (size_t)width_lod;
threads[1] = (size_t)height_lod;
clMemWrapper inputStream;
char *imagePtrOffset = imageValues + nextLevelOffset;
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
get_explicit_type_size( inputType ) * channel_scale * width_lod * height_lod, imagePtrOffset, &error );
test_error( error, "Unable to create input buffer" );
// Set arguments
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
test_error( error, "Unable to set kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get results
if( gTestMipmaps )
resultSize = width_lod * height_lod * get_pixel_size(imageInfo->format);
else
resultSize = imageInfo->rowPitch * imageInfo->height;
clProtectedArray PA(resultSize);
char *resultValues = (char *)((void *)PA);
if( gDebugTrace )
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
origin[2] = lod;
region[0] = width_lod;
region[1] = height_lod;
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
if( gDebugTrace )
log_info( " results read\n" );
// Validate results element by element
char *imagePtr = (char*)imageValues + nextLevelOffset;
int numTries = 5;
for( size_t y = 0, i = 0; y < height_lod; y++ )
{
char *resultPtr;
if( gTestMipmaps )
resultPtr = (char *)resultValues + y * width_lod * pixelSize;
else
resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
for( size_t x = 0; x < width_lod; x++, i++ )
{
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
// Convert this pixel
if( inputType == kFloat )
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
else if( inputType == kInt )
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
else // if( inputType == kUInt )
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
// Compare against the results
if(is_sRGBA_order(imageInfo->format->image_channel_order))
{
// Compare sRGB-mapped values
cl_float expected[4] = {0};
cl_float* input_values = (float*)imagePtr;
cl_uchar *actual = (cl_uchar*)resultPtr;
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
float err[4] = {0.0f};
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if(j < 3)
{
expected[j] = sRGBmap(input_values[j]);
}
else // there is no sRGB conversion for alpha component if it exists
{
expected[j] = NORMALIZE(input_values[j], 255.0f);
}
err[j] = fabsf( expected[ j ] - actual[ j ] );
}
if ((err[0] > max_err) ||
(err[1] > max_err) ||
(err[2] > max_err) ||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
{
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
{
// Compare floats
float *expected = (float *)resultBuffer;
float *actual = (float *)resultPtr;
float err = 0.f;
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
err /= (float)get_format_channel_count( imageInfo->format );
if( err > MAX_ERR )
{
unsigned int *e = (unsigned int *)expected;
unsigned int *a = (unsigned int *)actual;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Error: %g\n", err );
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
totalErrors++;
if( ( --numTries ) == 0 )
return 1;
}
}
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
{
// Compare half floats
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
{
cl_ushort *e = (cl_ushort *)resultBuffer;
cl_ushort *a = (cl_ushort *)resultPtr;
int err_cnt = 0;
//Fix up cases where we have NaNs
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
{
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
continue;
if( e[j] != a[j] )
err_cnt++;
}
if( err_cnt )
{
totalErrors++;
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
if( inputType == kFloat )
{
float *p = (float *)(char *)imagePtr;
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
}
if( ( --numTries ) == 0 )
return 1;
}
}
}
else
{
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
// result is inexact. Calculate error
int failure = 1;
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
if( 0 == forceCorrectlyRoundedWrites &&
(
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
))
{
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
failure = 0;
}
if( failure )
{
totalErrors++;
// Is it our special rounding test?
if( verifyRounding && i >= 1 && i <= 2 )
{
// Try to guess what the rounding mode of the device really is based on what it returned
const char *deviceRounding = "unknown";
unsigned int deviceResults[8];
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
deviceRounding = "truncate";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to nearest";
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
deviceRounding = "round to even";
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
return 1;
}
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
switch(imageInfo->format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_SNORM_INT8:
case CL_UNSIGNED_INT8:
case CL_SIGNED_INT8:
case CL_UNORM_INT_101010:
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
case CL_SIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_HALF_FLOAT:
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
case CL_UNSIGNED_INT32:
case CL_SIGNED_INT32:
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
break;
case CL_FLOAT:
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
}
float *v = (float *)(char *)imagePtr;
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
if( ( --numTries ) == 0 )
return 1;
}
}
}
imagePtr += get_explicit_type_size( inputType ) * channel_scale;
resultPtr += get_pixel_size( imageInfo->format );
}
}
{
nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format);
width_lod = (width_lod >> 1) ?(width_lod >> 1) : 1;
height_lod = (height_lod >> 1) ?(height_lod >> 1) : 1;
}
}
if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
}
// All done!
return totalErrors;
}
int test_write_image_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
{
char programSrc[10240];
const char *ptr;
const char *readFormat;
clProgramWrapper program;
clKernelWrapper kernel;
const char *KernelSourcePattern = NULL;
int error;
if (gTestImage2DFromBuffer)
{
if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
{
switch (format->image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_HALF_FLOAT:
case CL_FLOAT:
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
GetChannelTypeName( format->image_channel_data_type ));
return 0;
default:
break;
}
}
}
// Get our operating parameters
size_t maxWidth, maxHeight;
cl_ulong maxAllocSize, memSize;
image_descriptor imageInfo = { 0x0 };
imageInfo.format = format;
imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
test_error( error, "Unable to get max image 2D size from device" );
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
}
// Determine types
if( inputType == kInt )
readFormat = "i";
else if( inputType == kUInt )
readFormat = "ui";
else // kFloat
readFormat = "f";
if(gtestTypesToRun & kWriteTests)
{
KernelSourcePattern = writeKernelSourcePattern;
}
else
{
KernelSourcePattern = read_writeKernelSourcePattern;
}
// Construct the source
sprintf( programSrc,
KernelSourcePattern,
get_explicit_type_name( inputType ),
(format->image_channel_order == CL_DEPTH) ? "" : "4",
(format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
gTestMipmaps ? ", int lod" : "",
gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
readFormat,
gTestMipmaps ? ", lod" : "" );
ptr = programSrc;
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create testing kernel" );
// Run tests
if( gTestSmallImages )
{
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
{
if( gTestMipmaps )
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
if( gDebugTrace )
log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
}
else if( gTestMaxImages )
{
// Try a specific set of maximum sizes
size_t numbeOfSizes;
size_t sizes[100][3];
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
{
imageInfo.width = sizes[ idx ][ 0 ];
imageInfo.height = sizes[ idx ][ 1 ];
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
if( gTestMipmaps )
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
else if( gTestRounding )
{
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
imageInfo.height = typeRange / 256;
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
else
{
cl_uint imagePitchAlign = 0;
if (gTestImage2DFromBuffer)
{
#if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
if (!imagePitchAlign)
imagePitchAlign = 1;
#endif
test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
}
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
{
cl_ulong size;
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
// image, the result array, plus offset arrays, will fit in the global ram space
do
{
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
if(gTestMipmaps)
{
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, d);
size = 4 * compute_mipmapped_image_size(imageInfo);
}
else
{
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
if( gEnablePitch )
{
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
}
// if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
if (gTestImage2DFromBuffer)
{
size_t pitch = imagePitchAlign * get_pixel_size( imageInfo.format );
imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
}
size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
}
} while( size > maxAllocSize || ( size * 3 ) > memSize );
if( gDebugTrace )
log_info( " at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
if( retCode )
return retCode;
}
}
return 0;
}
int test_write_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
{
if( imageSampler->filter_mode == CL_FILTER_LINEAR )
// No need to run for linear filters
return 0;
int ret = 0;
log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
RandomSeed seed( gRandomSeed );
for( unsigned int i = 0; i < numFormats; i++ )
{
cl_image_format &imageFormat = formatList[ i ];
if( filterFlags[ i ] )
continue;
if (is_sRGBA_order(imageFormat.image_channel_order))
{
if( !is_extension_available( device, "cl_khr_srgb_image_writes" ))
{
log_missing_feature( "-----------------------------------------------------\n" );
log_missing_feature( "WARNING!!! sRGB formats are shown in the supported write-format list.\n");
log_missing_feature( "However the extension cl_khr_srgb_image_writes is not available.\n");
log_missing_feature( "Please make sure the extension is officially supported by the device .\n");
log_missing_feature( "-----------------------------------------------------\n\n" );
continue;
}
}
gTestCount++;
print_write_header( &imageFormat, false );
int retCode;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
retCode = test_write_image_1D_set( device, &imageFormat, inputType, seed );
break;
case CL_MEM_OBJECT_IMAGE2D:
retCode = test_write_image_set( device, &imageFormat, inputType, seed );
break;
case CL_MEM_OBJECT_IMAGE3D:
retCode = test_write_image_3D_set( device, &imageFormat, inputType, seed );
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
retCode = test_write_image_1D_array_set( device, &imageFormat, inputType, seed );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
retCode = test_write_image_2D_array_set( device, &imageFormat, inputType, seed );
break;
}
if( retCode != 0 )
{
gTestFailure++;
log_error( "FAILED: " );
print_write_header( &imageFormat, true );
log_info( "\n" );
}
ret += retCode;
}
return ret;
}