mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 15:39:03 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
28
test_conformance/images/kernel_read_write/CMakeLists.txt
Normal file
28
test_conformance/images/kernel_read_write/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
set(MODULE_NAME IMAGE_STREAMS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_iterations.cpp
|
||||
test_loops.cpp
|
||||
test_read_1D.cpp
|
||||
test_read_1D_array.cpp
|
||||
test_read_2D_array.cpp
|
||||
test_read_3D.cpp
|
||||
test_write_image.cpp
|
||||
test_write_1D.cpp
|
||||
test_write_1D_array.cpp
|
||||
test_write_2D_array.cpp
|
||||
test_write_3D.cpp
|
||||
../../../test_common/harness/errorHelpers.c
|
||||
../../../test_common/harness/threadTesting.c
|
||||
../../../test_common/harness/kernelHelpers.c
|
||||
../../../test_common/harness/imageHelpers.cpp
|
||||
../../../test_common/harness/mt19937.c
|
||||
../../../test_common/harness/conversions.c
|
||||
../../../test_common/harness/testHarness.c
|
||||
../../../test_common/harness/typeWrappers.cpp
|
||||
../../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
|
||||
19
test_conformance/images/kernel_read_write/Jamfile
Normal file
19
test_conformance/images/kernel_read_write/Jamfile
Normal file
@@ -0,0 +1,19 @@
|
||||
project
|
||||
: requirements
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
# <toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_image_streams
|
||||
: main.cpp
|
||||
test_iterations.cpp
|
||||
test_loops.cpp
|
||||
test_read_3D.cpp
|
||||
test_write_image.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_image_streams
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/kernel_read_write
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/images/kernel_read_write
|
||||
;
|
||||
56
test_conformance/images/kernel_read_write/Makefile
Normal file
56
test_conformance/images/kernel_read_write/Makefile
Normal file
@@ -0,0 +1,56 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.cpp \
|
||||
test_iterations.cpp \
|
||||
test_loops.cpp \
|
||||
test_write_image.cpp \
|
||||
test_read_1D.cpp \
|
||||
test_read_3D.cpp \
|
||||
test_read_1D_array.cpp \
|
||||
test_read_2D_array.cpp \
|
||||
test_write_1D.cpp \
|
||||
test_write_3D.cpp \
|
||||
test_write_1D_array.cpp \
|
||||
test_write_2D_array.cpp \
|
||||
../../../test_common/harness/errorHelpers.c \
|
||||
../../../test_common/harness/threadTesting.c \
|
||||
../../../test_common/harness/kernelHelpers.c \
|
||||
../../../test_common/harness/imageHelpers.cpp \
|
||||
../../../test_common/harness/conversions.c \
|
||||
../../../test_common/harness/testHarness.c \
|
||||
../../../test_common/harness/mt19937.c \
|
||||
../../../test_common/harness/typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK =
|
||||
HEADERS =
|
||||
TARGET = test_image_streams
|
||||
INCLUDE = -I../../test_common/harness
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
|
||||
CC = c++
|
||||
CXX = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
651
test_conformance/images/kernel_read_write/main.cpp
Normal file
651
test_conformance/images/kernel_read_write/main.cpp
Normal file
@@ -0,0 +1,651 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#include "../testBase.h"
|
||||
#include "../../../test_common/harness/fpcontrol.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#if defined(__PPC__)
|
||||
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
|
||||
// be used because not all Power implementations retain or observed the NI (non-IEEE
|
||||
// mode) bit.
|
||||
__thread fpu_control_t fpu_control = 0;
|
||||
#endif
|
||||
|
||||
bool gDebugTrace = false, gExtraValidateInfo = false, gDisableOffsets = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false, gTestImage2DFromBuffer = 0, gTestMipmaps = false;
|
||||
cl_filter_mode gFilterModeToUse = (cl_filter_mode)-1;
|
||||
// Default is CL_MEM_USE_HOST_PTR for the test
|
||||
cl_mem_flags gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
|
||||
bool gUseKernelSamplers = false;
|
||||
int gTypesToTest = 0;
|
||||
cl_addressing_mode gAddressModeToUse = (cl_addressing_mode)-1;
|
||||
int gNormalizedModeToUse = 7;
|
||||
cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
|
||||
cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
|
||||
bool gEnablePitch = false;
|
||||
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
|
||||
int gtestTypesToRun = 0;
|
||||
cl_command_queue queue;
|
||||
cl_context context;
|
||||
|
||||
#define MAX_ALLOWED_STD_DEVIATION_IN_MB 8.0
|
||||
|
||||
void printUsage( const char *execName )
|
||||
{
|
||||
const char *p = strrchr( execName, '/' );
|
||||
if( p != NULL )
|
||||
execName = p + 1;
|
||||
|
||||
log_info( "Usage: %s [read] [write] [CL_FILTER_LINEAR|CL_FILTER_NEAREST] [no_offsets] [debug_trace] [small_images]\n", execName );
|
||||
log_info( "Where:\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tThe following flags specify what kinds of operations to test. They can be combined; if none are specified, all are tested:\n" );
|
||||
log_info( "\t\tread - Tests reading from an image\n" );
|
||||
log_info( "\t\twrite - Tests writing to an image (can be specified with read to run both; default is both)\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
|
||||
log_info( "\t\tint - Test integer I/O (read_imagei, write_imagei)\n" );
|
||||
log_info( "\t\tuint - Test unsigned integer I/O (read_imageui, write_imageui)\n" );
|
||||
log_info( "\t\tfloat - Test float I/O (read_imagef, write_imagef)\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tCL_FILTER_LINEAR - Only tests formats with CL_FILTER_LINEAR filtering\n" );
|
||||
log_info( "\tCL_FILTER_NEAREST - Only tests formats with CL_FILTER_NEAREST filtering\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tNORMALIZED - Only tests formats with NORMALIZED coordinates\n" );
|
||||
log_info( "\tUNNORMALIZED - Only tests formats with UNNORMALIZED coordinates\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tCL_ADDRESS_CLAMP - Only tests formats with CL_ADDRESS_CLAMP addressing\n" );
|
||||
log_info( "\tCL_ADDRESS_CLAMP_TO_EDGE - Only tests formats with CL_ADDRESS_CLAMP_TO_EDGE addressing\n" );
|
||||
log_info( "\tCL_ADDRESS_REPEAT - Only tests formats with CL_ADDRESS_REPEAT addressing\n" );
|
||||
log_info( "\tCL_ADDRESS_MIRRORED_REPEAT - Only tests formats with CL_ADDRESS_MIRRORED_REPEAT addressing\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\t1D - Only test 1D images\n" );
|
||||
log_info( "\t2D - Only test 2D images\n" );
|
||||
log_info( "\t3D - Only test 3D images\n" );
|
||||
log_info( "\t1Darray - Only test 1D image arrays\n" );
|
||||
log_info( "\t2Darray - Only test 2D image arrays\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tlocal_samplers - Use samplers declared in the kernel functions instead of passed in as arguments\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tThe following specify to use the specific flag to allocate images to use in the tests:\n" );
|
||||
log_info( "\t\tCL_MEM_COPY_HOST_PTR\n" );
|
||||
log_info( "\t\tCL_MEM_USE_HOST_PTR (default)\n" );
|
||||
log_info( "\t\tCL_MEM_ALLOC_HOST_PTR\n" );
|
||||
log_info( "\t\tNO_HOST_PTR - Specifies to use none of the above flags\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tThe following modify the types of images tested:\n" );
|
||||
log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
|
||||
log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
|
||||
log_info( "\t\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tno_offsets - Disables offsets when testing reads (can be good for diagnosing address repeating/clamping problems)\n" );
|
||||
log_info( "\tdebug_trace - Enables additional debug info logging\n" );
|
||||
log_info( "\textra_validate - Enables additional validation failure debug information\n" );
|
||||
log_info( "\tuse_pitches - Enables row and slice pitches\n" );
|
||||
log_info( "\ttest_mipmaps - Enables mipmapped images\n");
|
||||
}
|
||||
|
||||
extern int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
|
||||
|
||||
/** read_write images only support sampler-less read buildt-ins which require special settings
|
||||
* for some global parameters. This pair of functions temporarily overwrite those global parameters
|
||||
* and then recover them after completing a read_write test.
|
||||
*/
|
||||
static void overwrite_global_params_for_read_write_test( bool *tTestMipmaps,
|
||||
bool *tDisableOffsets,
|
||||
bool *tNormalizedModeToUse,
|
||||
cl_filter_mode *tFilterModeToUse)
|
||||
{
|
||||
log_info("Overwrite global settings for read_write image tests. The overwritten values:\n");
|
||||
log_info("gTestMipmaps = false, gDisableOffsets = true, gNormalizedModeToUse = false, gFilterModeToUse = CL_FILTER_NEAREST\n" );
|
||||
// mipmap images only support sampler read built-in while read_write images only support
|
||||
// sampler-less read built-in. Hence we cannot test mipmap for read_write image.
|
||||
*tTestMipmaps = gTestMipmaps;
|
||||
gTestMipmaps = false;
|
||||
|
||||
// Read_write images are read by sampler-less read which does not handle out-of-bound read
|
||||
// It's application responsibility to make sure that the read happens in-bound
|
||||
// Therefore we should not enable offset in testing read_write images because it will cause out-of-bound
|
||||
*tDisableOffsets = gDisableOffsets;
|
||||
gDisableOffsets = true;
|
||||
|
||||
// The sampler-less read image functions behave exactly as the corresponding read image functions
|
||||
|
||||
|
||||
*tNormalizedModeToUse = gNormalizedModeToUse;
|
||||
gNormalizedModeToUse = false;
|
||||
*tFilterModeToUse = gFilterModeToUse;
|
||||
gFilterModeToUse = CL_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
/** Recover the global settings overwritten for read_write tests. This is necessary because
|
||||
* there may be other tests (i.e. read or write) are called together with read_write test.
|
||||
*/
|
||||
static void recover_global_params_from_read_write_test(bool tTestMipmaps,
|
||||
bool tDisableOffsets,
|
||||
bool tNormalizedModeToUse,
|
||||
cl_filter_mode tFilterModeToUse)
|
||||
{
|
||||
gTestMipmaps = tTestMipmaps;
|
||||
gDisableOffsets = tDisableOffsets;
|
||||
gNormalizedModeToUse = tNormalizedModeToUse;
|
||||
gFilterModeToUse = tFilterModeToUse;
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_channel_type chanType;
|
||||
cl_channel_order chanOrder;
|
||||
char str[ 128 ];
|
||||
int testTypesToRun = 0;
|
||||
int testMethods = 0;
|
||||
bool randomize = false;
|
||||
bool tTestMipMaps = false;
|
||||
bool tDisableOffsets = false;
|
||||
bool tNormalizedModeToUse = false;
|
||||
cl_filter_mode tFilterModeToUse = (cl_filter_mode)-1;
|
||||
|
||||
test_start();
|
||||
|
||||
//Check CL_DEVICE_TYPE environment variable
|
||||
checkDeviceTypeOverride( &gDeviceType );
|
||||
|
||||
// Parse arguments
|
||||
for( int i = 1; i < argc; i++ )
|
||||
{
|
||||
strncpy( str, argv[ i ], sizeof( str ) - 1 );
|
||||
|
||||
if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
|
||||
else if( strcmp( str, "debug_trace" ) == 0 )
|
||||
gDebugTrace = true;
|
||||
|
||||
else if( strcmp( str, "CL_FILTER_NEAREST" ) == 0 || strcmp( str, "NEAREST" ) == 0 )
|
||||
gFilterModeToUse = CL_FILTER_NEAREST;
|
||||
else if( strcmp( str, "CL_FILTER_LINEAR" ) == 0 || strcmp( str, "LINEAR" ) == 0 )
|
||||
gFilterModeToUse = CL_FILTER_LINEAR;
|
||||
|
||||
else if( strcmp( str, "CL_ADDRESS_NONE" ) == 0 )
|
||||
gAddressModeToUse = CL_ADDRESS_NONE;
|
||||
else if( strcmp( str, "CL_ADDRESS_CLAMP" ) == 0 )
|
||||
gAddressModeToUse = CL_ADDRESS_CLAMP;
|
||||
else if( strcmp( str, "CL_ADDRESS_CLAMP_TO_EDGE" ) == 0 )
|
||||
gAddressModeToUse = CL_ADDRESS_CLAMP_TO_EDGE;
|
||||
else if( strcmp( str, "CL_ADDRESS_REPEAT" ) == 0 )
|
||||
gAddressModeToUse = CL_ADDRESS_REPEAT;
|
||||
else if( strcmp( str, "CL_ADDRESS_MIRRORED_REPEAT" ) == 0 )
|
||||
gAddressModeToUse = CL_ADDRESS_MIRRORED_REPEAT;
|
||||
|
||||
else if( strcmp( str, "NORMALIZED" ) == 0 )
|
||||
gNormalizedModeToUse = true;
|
||||
else if( strcmp( str, "UNNORMALIZED" ) == 0 )
|
||||
gNormalizedModeToUse = false;
|
||||
|
||||
|
||||
else if( strcmp( str, "no_offsets" ) == 0 )
|
||||
gDisableOffsets = true;
|
||||
else if( strcmp( str, "small_images" ) == 0 )
|
||||
gTestSmallImages = true;
|
||||
else if( strcmp( str, "max_images" ) == 0 )
|
||||
gTestMaxImages = true;
|
||||
else if( strcmp( str, "use_pitches" ) == 0 )
|
||||
gEnablePitch = true;
|
||||
else if( strcmp( str, "rounding" ) == 0 )
|
||||
gTestRounding = true;
|
||||
else if( strcmp( str, "extra_validate" ) == 0 )
|
||||
gExtraValidateInfo = true;
|
||||
else if( strcmp( str, "test_mipmaps" ) == 0 ) {
|
||||
// 2.0 Spec does not allow using mem flags, unnormalized coordinates with mipmapped images
|
||||
gTestMipmaps = true;
|
||||
gMemFlagsToUse = 0;
|
||||
gNormalizedModeToUse = true;
|
||||
}
|
||||
|
||||
else if( strcmp( str, "read" ) == 0 )
|
||||
testTypesToRun |= kReadTests;
|
||||
else if( strcmp( str, "write" ) == 0 )
|
||||
testTypesToRun |= kWriteTests;
|
||||
else if( strcmp( str, "read_write" ) == 0 )
|
||||
{
|
||||
testTypesToRun |= kReadWriteTests;
|
||||
}
|
||||
|
||||
else if( strcmp( str, "local_samplers" ) == 0 )
|
||||
gUseKernelSamplers = true;
|
||||
|
||||
else if( strcmp( str, "int" ) == 0 )
|
||||
gTypesToTest |= kTestInt;
|
||||
else if( strcmp( str, "uint" ) == 0 )
|
||||
gTypesToTest |= kTestUInt;
|
||||
else if( strcmp( str, "float" ) == 0 )
|
||||
gTypesToTest |= kTestFloat;
|
||||
|
||||
else if( strcmp( str, "randomize" ) == 0 )
|
||||
randomize = true;
|
||||
|
||||
else if ( strcmp( str, "1D" ) == 0 )
|
||||
testMethods |= k1D;
|
||||
else if( strcmp( str, "2D" ) == 0 )
|
||||
testMethods |= k2D;
|
||||
else if( strcmp( str, "3D" ) == 0 )
|
||||
testMethods |= k3D;
|
||||
else if( strcmp( str, "1Darray" ) == 0 )
|
||||
testMethods |= k1DArray;
|
||||
else if( strcmp( str, "2Darray" ) == 0 )
|
||||
testMethods |= k2DArray;
|
||||
|
||||
else if( strcmp( str, "CL_MEM_COPY_HOST_PTR" ) == 0 || strcmp( str, "COPY_HOST_PTR" ) == 0 )
|
||||
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
|
||||
else if( strcmp( str, "CL_MEM_USE_HOST_PTR" ) == 0 || strcmp( str, "USE_HOST_PTR" ) == 0 )
|
||||
gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
|
||||
else if( strcmp( str, "CL_MEM_ALLOC_HOST_PTR" ) == 0 || strcmp( str, "ALLOC_HOST_PTR" ) == 0 )
|
||||
gMemFlagsToUse = CL_MEM_ALLOC_HOST_PTR;
|
||||
else if( strcmp( str, "NO_HOST_PTR" ) == 0 )
|
||||
gMemFlagsToUse = 0;
|
||||
|
||||
else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
|
||||
{
|
||||
printUsage( argv[ 0 ] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
|
||||
gChannelTypeToUse = chanType;
|
||||
|
||||
else if( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
|
||||
gChannelOrderToUse = chanOrder;
|
||||
else
|
||||
{
|
||||
log_error( "ERROR: Unknown argument %d: %s. Exiting....\n", i, str );
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (testMethods == 0)
|
||||
testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
|
||||
if( testTypesToRun == 0 )
|
||||
testTypesToRun = kAllTests;
|
||||
if( gTypesToTest == 0 )
|
||||
gTypesToTest = kTestAllTypes;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#define kHasSSE3 0x00000008
|
||||
#define kHasSupplementalSSE3 0x00000100
|
||||
#define kHasSSE4_1 0x00000400
|
||||
#define kHasSSE4_2 0x00000800
|
||||
/* check our environment for a hint to disable SSE variants */
|
||||
{
|
||||
const char *env = getenv( "CL_MAX_SSE" );
|
||||
if( env )
|
||||
{
|
||||
extern int _cpu_capabilities;
|
||||
int mask = 0;
|
||||
if( 0 == strcmp( env, "SSE4.1" ) )
|
||||
mask = kHasSSE4_2;
|
||||
else if( 0 == strcmp( env, "SSSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1;
|
||||
else if( 0 == strcmp( env, "SSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
|
||||
else if( 0 == strcmp( env, "SSE2" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
|
||||
|
||||
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
|
||||
_cpu_capabilities &= ~mask;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Seed the random # generators
|
||||
if( randomize )
|
||||
{
|
||||
gRandomSeed = (cl_uint) time( NULL );
|
||||
gReSeed = 1;
|
||||
log_info( "Random seed: %u\n", gRandomSeed );
|
||||
}
|
||||
|
||||
int error;
|
||||
// Get our platform
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
if( error )
|
||||
{
|
||||
print_error( error, "Unable to get platform" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get our device
|
||||
cl_uint num_devices = 0;
|
||||
error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices );
|
||||
if( error )
|
||||
{
|
||||
print_error( error, "Unable to get the number of devices" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<cl_device_id> devices(num_devices);
|
||||
error = clGetDeviceIDs(platform, gDeviceType, num_devices, &devices[0], NULL );
|
||||
if( error )
|
||||
{
|
||||
print_error( error, "Unable to get specified device type" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
int device_index = 0;
|
||||
char* device_index_str = getenv("CL_DEVICE_INDEX");
|
||||
if (device_index_str && ((device_index = atoi(device_index_str))) >= num_devices) {
|
||||
log_error("CL_DEVICE_INDEX=%d is greater than the number of devices %d\n",device_index,num_devices);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
device = devices[device_index];
|
||||
|
||||
// Get the device type so we know if it is a GPU even if default is passed in.
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL);
|
||||
if( error )
|
||||
{
|
||||
print_error( error, "Unable to get device type" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( printDeviceHeader( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check for image support
|
||||
if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
|
||||
log_info("Device does not support images. Skipping test.\n");
|
||||
test_finish();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Create a context to test with
|
||||
context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create testing context" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a queue against the context
|
||||
queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create testing command queue" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( gTestSmallImages )
|
||||
log_info( "Note: Using small test images\n" );
|
||||
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not to flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ(&oldMode);
|
||||
|
||||
// Run the test now
|
||||
int ret = 0;
|
||||
if (testMethods & k1D)
|
||||
{
|
||||
if (testTypesToRun & kReadTests)
|
||||
{
|
||||
gtestTypesToRun = kReadTests;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
|
||||
}
|
||||
|
||||
if (testTypesToRun & kWriteTests)
|
||||
{
|
||||
gtestTypesToRun = kWriteTests;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
|
||||
}
|
||||
|
||||
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
|
||||
{
|
||||
gtestTypesToRun = kReadWriteTests;
|
||||
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
|
||||
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
|
||||
}
|
||||
}
|
||||
if (testMethods & k2D)
|
||||
{
|
||||
if (testTypesToRun & kReadTests)
|
||||
{
|
||||
gtestTypesToRun = kReadTests;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
|
||||
{
|
||||
log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
|
||||
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
|
||||
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
|
||||
{
|
||||
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
|
||||
gTestImage2DFromBuffer = true;
|
||||
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
|
||||
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
gTestImage2DFromBuffer = false;
|
||||
gMemFlagsToUse = saved_gMemFlagsToUse;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (testTypesToRun & kWriteTests)
|
||||
{
|
||||
gtestTypesToRun = kWriteTests;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
|
||||
{
|
||||
log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
|
||||
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
|
||||
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
|
||||
{
|
||||
bool saved_gEnablePitch = gEnablePitch;
|
||||
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
|
||||
gEnablePitch = true;
|
||||
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
|
||||
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
|
||||
gTestImage2DFromBuffer = true;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
gTestImage2DFromBuffer = false;
|
||||
gMemFlagsToUse = saved_gMemFlagsToUse;
|
||||
gEnablePitch = saved_gEnablePitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
|
||||
{
|
||||
gtestTypesToRun = kReadWriteTests;
|
||||
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
|
||||
{
|
||||
log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
|
||||
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
|
||||
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
|
||||
{
|
||||
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
|
||||
gTestImage2DFromBuffer = true;
|
||||
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
|
||||
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
gTestImage2DFromBuffer = false;
|
||||
gMemFlagsToUse = saved_gMemFlagsToUse;
|
||||
}
|
||||
}
|
||||
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
|
||||
{
|
||||
log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
|
||||
// NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
|
||||
if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
|
||||
{
|
||||
bool saved_gEnablePitch = gEnablePitch;
|
||||
cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
|
||||
gEnablePitch = true;
|
||||
// disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
|
||||
gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
|
||||
gTestImage2DFromBuffer = true;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
|
||||
gTestImage2DFromBuffer = false;
|
||||
gMemFlagsToUse = saved_gMemFlagsToUse;
|
||||
gEnablePitch = saved_gEnablePitch;
|
||||
}
|
||||
}
|
||||
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
|
||||
}
|
||||
}
|
||||
if (testMethods & k3D)
|
||||
{
|
||||
if (testTypesToRun & kReadTests)
|
||||
{
|
||||
gtestTypesToRun = kReadTests;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
|
||||
}
|
||||
|
||||
if (testTypesToRun & kWriteTests)
|
||||
{
|
||||
gtestTypesToRun = kWriteTests;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
|
||||
}
|
||||
|
||||
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
|
||||
{
|
||||
gtestTypesToRun = kReadWriteTests;
|
||||
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
|
||||
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
|
||||
}
|
||||
}
|
||||
if (testMethods & k1DArray)
|
||||
{
|
||||
if (testTypesToRun & kReadTests)
|
||||
{
|
||||
gtestTypesToRun = kReadTests;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
|
||||
}
|
||||
|
||||
if (testTypesToRun & kWriteTests)
|
||||
{
|
||||
gtestTypesToRun = kWriteTests;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
|
||||
}
|
||||
|
||||
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
|
||||
{
|
||||
gtestTypesToRun = kReadWriteTests;
|
||||
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
|
||||
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
|
||||
}
|
||||
}
|
||||
if (testMethods & k2DArray)
|
||||
{
|
||||
if (testTypesToRun & kReadTests)
|
||||
{
|
||||
gtestTypesToRun = kReadTests;
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
|
||||
}
|
||||
|
||||
if (testTypesToRun & kWriteTests)
|
||||
{
|
||||
gtestTypesToRun = kWriteTests;
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
|
||||
}
|
||||
|
||||
if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
|
||||
{
|
||||
gtestTypesToRun = kReadWriteTests;
|
||||
overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
|
||||
ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
|
||||
ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
|
||||
recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
|
||||
}
|
||||
}
|
||||
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState(&oldMode);
|
||||
|
||||
error = clFinish(queue);
|
||||
if (error)
|
||||
print_error(error, "clFinish failed.");
|
||||
|
||||
clReleaseContext(context);
|
||||
clReleaseCommandQueue(queue);
|
||||
|
||||
if (gTestFailure == 0) {
|
||||
if (gTestCount > 1)
|
||||
log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
|
||||
else
|
||||
log_info("PASSED test.\n");
|
||||
} else if (gTestFailure > 0) {
|
||||
if (gTestCount > 1)
|
||||
log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
|
||||
else
|
||||
log_error("FAILED test.\n");
|
||||
}
|
||||
|
||||
// Clean up
|
||||
test_finish();
|
||||
|
||||
if (gTestFailure > 0)
|
||||
return gTestFailure;
|
||||
|
||||
return ret;
|
||||
}
|
||||
1765
test_conformance/images/kernel_read_write/test_iterations.cpp
Normal file
1765
test_conformance/images/kernel_read_write/test_iterations.cpp
Normal file
File diff suppressed because it is too large
Load Diff
466
test_conformance/images/kernel_read_write/test_loops.cpp
Normal file
466
test_conformance/images/kernel_read_write/test_loops.cpp
Normal file
@@ -0,0 +1,466 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
extern cl_context context;
|
||||
extern cl_filter_mode gFilterModeToUse;
|
||||
extern cl_addressing_mode gAddressModeToUse;
|
||||
extern int gTypesToTest;
|
||||
extern int gNormalizedModeToUse;
|
||||
extern cl_channel_type gChannelTypeToUse;
|
||||
extern cl_channel_order gChannelOrderToUse;
|
||||
|
||||
extern bool gDebugTrace;
|
||||
extern bool gTestMipmaps;
|
||||
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
|
||||
bool floatCoords, ExplicitType outputType );
|
||||
extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
|
||||
bool floatCoords, ExplicitType outputType );
|
||||
extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
|
||||
bool floatCoords, ExplicitType outputType );
|
||||
extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
|
||||
bool floatCoords, ExplicitType outputType );
|
||||
extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
|
||||
bool floatCoords, ExplicitType outputType );
|
||||
|
||||
static const char *str_1d_image = "1D";
|
||||
static const char *str_2d_image = "2D";
|
||||
static const char *str_3d_image = "3D";
|
||||
static const char *str_1d_image_array = "1D array";
|
||||
static const char *str_2d_image_array = "2D array";
|
||||
|
||||
static const char *convert_image_type_to_string(cl_mem_object_type imageType)
|
||||
{
|
||||
const char *p;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
p = str_1d_image;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
p = str_2d_image;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
p = str_3d_image;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
p = str_1d_image_array;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
p = str_2d_image_array;
|
||||
break;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
|
||||
{
|
||||
int numSupported = 0;
|
||||
for( unsigned int j = 0; j < formatCount; j++ )
|
||||
{
|
||||
// If this format has been previously filtered, remove the filter
|
||||
if( filterFlags[ j ] )
|
||||
filterFlags[ j ] = false;
|
||||
|
||||
// skip mipmap tests for CL_DEPTH formats (re# Khronos Bug 13762)
|
||||
if(gTestMipmaps && (formatList[ j ].image_channel_order == CL_DEPTH))
|
||||
{
|
||||
log_info("Skip mipmap tests for CL_DEPTH format\n");
|
||||
filterFlags[ j ] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Have we already discarded the channel type via the command line?
|
||||
if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
|
||||
{
|
||||
filterFlags[ j ] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Have we already discarded the channel order via the command line?
|
||||
if( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
|
||||
{
|
||||
filterFlags[ j ] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
|
||||
if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
|
||||
{
|
||||
filterFlags[ j ] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( !channelDataTypesToFilter )
|
||||
{
|
||||
numSupported++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Is the format supported?
|
||||
int i;
|
||||
for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
|
||||
{
|
||||
if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
|
||||
{
|
||||
numSupported++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
|
||||
{
|
||||
// Format is NOT supported, so mark it as such
|
||||
filterFlags[ j ] = true;
|
||||
}
|
||||
}
|
||||
return numSupported;
|
||||
}
|
||||
|
||||
int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
|
||||
{
|
||||
int error;
|
||||
|
||||
cl_image_format tempList[ 128 ];
|
||||
error = clGetSupportedImageFormats( context, flags,
|
||||
imageType, 128, tempList, &outFormatCount );
|
||||
test_error( error, "Unable to get count of supported image formats" );
|
||||
|
||||
outFormatList = new cl_image_format[ outFormatCount ];
|
||||
error = clGetSupportedImageFormats( context, flags,
|
||||
imageType, outFormatCount, outFormatList, NULL );
|
||||
test_error( error, "Unable to get list of supported image formats" );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_read_image_type( cl_device_id device, cl_image_format *format, bool floatCoords,
|
||||
image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
|
||||
{
|
||||
int ret = 0;
|
||||
cl_addressing_mode *addressModes = NULL;
|
||||
|
||||
// The sampler-less read image functions behave exactly as the corresponding read image functions
|
||||
// described in section 6.13.14.2 that take integer coordinates and a sampler with filter mode set to
|
||||
// CLK_FILTER_NEAREST, normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE
|
||||
cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE, (cl_addressing_mode)-1 };
|
||||
cl_addressing_mode addressModes_ro[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
|
||||
|
||||
if(gtestTypesToRun & kReadWriteTests)
|
||||
{
|
||||
addressModes = addressModes_rw;
|
||||
}
|
||||
else
|
||||
{
|
||||
addressModes = addressModes_ro;
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// According to the OpenCL specification, we do not guarantee the precision
|
||||
// of operations for linear filtering on the GPU. We do not test linear
|
||||
// filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
|
||||
// test it internally for a set of other image formats.
|
||||
if ((gDeviceType == CL_DEVICE_TYPE_GPU) &&
|
||||
(imageSampler->filter_mode == CL_FILTER_LINEAR) &&
|
||||
(format->image_channel_order == CL_RGB) &&
|
||||
(format->image_channel_data_type == CL_UNORM_INT_101010))
|
||||
{
|
||||
log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with CL_FILTER_LINEAR on GPU.\n");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
|
||||
{
|
||||
imageSampler->addressing_mode = addressModes[ adMode ];
|
||||
|
||||
if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
|
||||
continue; // Repeat doesn't make sense for non-normalized coords
|
||||
|
||||
// Use this run if we were told to only run a certain filter mode
|
||||
if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
|
||||
continue;
|
||||
|
||||
/*
|
||||
Remove redundant check to see if workaround still necessary
|
||||
// Check added in because this case was leaking through causing a crash on CPU
|
||||
if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
|
||||
continue; //repeat mode requires normalized coordinates
|
||||
*/
|
||||
print_read_header( format, imageSampler, false );
|
||||
|
||||
gTestCount++;
|
||||
|
||||
int retCode = 0;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
retCode = test_read_image_set_1D( device, format, imageSampler, floatCoords, outputType );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
retCode = test_read_image_set_1D_array( device, format, imageSampler, floatCoords, outputType );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
retCode = test_read_image_set_2D( device, format, imageSampler, floatCoords, outputType );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
retCode = test_read_image_set_2D_array( device, format, imageSampler, floatCoords, outputType );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
retCode = test_read_image_set_3D( device, format, imageSampler, floatCoords, outputType );
|
||||
break;
|
||||
}
|
||||
if( retCode != 0 )
|
||||
{
|
||||
gTestFailure++;
|
||||
log_error( "FAILED: " );
|
||||
print_read_header( format, imageSampler, true );
|
||||
log_info( "\n" );
|
||||
}
|
||||
ret |= retCode;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int test_read_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
|
||||
image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
|
||||
{
|
||||
int ret = 0;
|
||||
bool flipFlop[2] = { false, true };
|
||||
int normalizedIdx, floatCoordIdx;
|
||||
|
||||
|
||||
// Use this run if we were told to only run a certain filter mode
|
||||
if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
|
||||
return 0;
|
||||
|
||||
// Test normalized/non-normalized
|
||||
for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
|
||||
{
|
||||
imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
|
||||
if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
|
||||
continue;
|
||||
|
||||
for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
|
||||
{
|
||||
// Checks added in because this case was leaking through causing a crash on CPU
|
||||
if( !flipFlop[ floatCoordIdx ] )
|
||||
if( imageSampler->filter_mode != CL_FILTER_NEAREST || // integer coords can only be used with nearest
|
||||
flipFlop[ normalizedIdx ]) // Normalized integer coords makes no sense (they'd all be zero)
|
||||
continue;
|
||||
|
||||
if( flipFlop[ floatCoordIdx ] && (gtestTypesToRun & kReadWriteTests))
|
||||
// sampler-less read in read_write tests run only integer coord
|
||||
continue;
|
||||
|
||||
|
||||
log_info( "read_image (%s coords, %s results) *****************************\n",
|
||||
flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer",
|
||||
get_explicit_type_name( outputType ) );
|
||||
|
||||
for( unsigned int i = 0; i < numFormats; i++ )
|
||||
{
|
||||
if( filterFlags[i] )
|
||||
continue;
|
||||
|
||||
cl_image_format &imageFormat = formatList[ i ];
|
||||
|
||||
ret |= test_read_image_type( device, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
|
||||
{
|
||||
int ret = 0;
|
||||
static int printedFormatList = -1;
|
||||
|
||||
|
||||
if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
|
||||
{
|
||||
gTestFailure++;
|
||||
log_error( "-----------------------------------------------------\n" );
|
||||
log_error( "FAILED: test writing CL_MEM_OBJECT_IMAGE3D images\n" );
|
||||
log_error( "This device does not support the mandated extension cl_khr_3d_image_writes.\n");
|
||||
log_error( "-----------------------------------------------------\n\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( gTestMipmaps )
|
||||
{
|
||||
if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
|
||||
{
|
||||
log_info( "-----------------------------------------------------\n" );
|
||||
log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
|
||||
log_info( "-----------------------------------------------------\n\n" );
|
||||
return 0;
|
||||
}
|
||||
if ( ( 0 == is_extension_available( device, "cl_khr_mipmap_image_writes" )) && (formatTestFn == test_write_image_formats))
|
||||
{
|
||||
log_info( "-----------------------------------------------------\n" );
|
||||
log_info( "This device does not support cl_khr_mipmap_image_writes.\nSkipping mipmapped image write test. \n" );
|
||||
log_info( "-----------------------------------------------------\n\n" );
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int version_check = check_opencl_version(device,1,2);
|
||||
if (version_check != 0) {
|
||||
switch (imageType) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
test_missing_feature(version_check, "image_1D");
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
test_missing_feature(version_check, "image_1D_array");
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
test_missing_feature(version_check, "image_2D_array");
|
||||
}
|
||||
}
|
||||
|
||||
// Grab the list of supported image formats for integer reads
|
||||
cl_image_format *formatList;
|
||||
bool *filterFlags;
|
||||
unsigned int numFormats;
|
||||
|
||||
// This flag is only for querying the list of supported formats
|
||||
// The flag for creating image will be set explicitly in test functions
|
||||
cl_mem_flags flags;
|
||||
const char *flagNames;
|
||||
if( formatTestFn == test_read_image_formats )
|
||||
{
|
||||
if(gtestTypesToRun & kReadTests)
|
||||
{
|
||||
flags = CL_MEM_READ_ONLY;
|
||||
flagNames = "read";
|
||||
}
|
||||
else
|
||||
{
|
||||
flags = CL_MEM_KERNEL_READ_AND_WRITE;
|
||||
flagNames = "read_write";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
flags = CL_MEM_WRITE_ONLY;
|
||||
flagNames = "write";
|
||||
}
|
||||
else
|
||||
{
|
||||
flags = CL_MEM_KERNEL_READ_AND_WRITE;
|
||||
flagNames = "read_write";
|
||||
}
|
||||
}
|
||||
|
||||
if( get_format_list( device, imageType, formatList, numFormats, flags ) )
|
||||
return -1;
|
||||
BufferOwningPtr<cl_image_format> formatListBuf(formatList);
|
||||
|
||||
|
||||
filterFlags = new bool[ numFormats ];
|
||||
if( filterFlags == NULL )
|
||||
{
|
||||
log_error( "ERROR: Out of memory allocating filter flags list!\n" );
|
||||
return -1;
|
||||
}
|
||||
BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
|
||||
memset( filterFlags, 0, sizeof( bool ) * numFormats );
|
||||
|
||||
// First time through, we'll go ahead and print the formats supported, regardless of type
|
||||
int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
|
||||
if( printedFormatList != test )
|
||||
{
|
||||
log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
|
||||
for( unsigned int f = 0; f < numFormats; f++ )
|
||||
{
|
||||
if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_data_type ) )
|
||||
log_info( " %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
|
||||
GetChannelTypeName( formatList[ f ].image_channel_data_type ),
|
||||
(int)get_format_channel_count( &formatList[ f ] ) );
|
||||
}
|
||||
log_info( "------------------------------------------- \n" );
|
||||
printedFormatList = test;
|
||||
}
|
||||
|
||||
image_sampler_data imageSampler;
|
||||
|
||||
/////// float tests ///////
|
||||
|
||||
if( gTypesToTest & kTestFloat )
|
||||
{
|
||||
cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
|
||||
#endif
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
CL_SFIXED14_APPLE,
|
||||
#endif
|
||||
CL_UNORM_INT8, CL_SNORM_INT8,
|
||||
CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
|
||||
if( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )
|
||||
{
|
||||
log_info( "No formats supported for float type\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
imageSampler.filter_mode = CL_FILTER_NEAREST;
|
||||
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
|
||||
|
||||
imageSampler.filter_mode = CL_FILTER_LINEAR;
|
||||
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
|
||||
}
|
||||
}
|
||||
|
||||
/////// int tests ///////
|
||||
if( gTypesToTest & kTestInt )
|
||||
{
|
||||
cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
|
||||
if( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
|
||||
{
|
||||
log_info( "No formats supported for integer type\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only filter mode we support on int is nearest
|
||||
imageSampler.filter_mode = CL_FILTER_NEAREST;
|
||||
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kInt, imageType );
|
||||
}
|
||||
}
|
||||
|
||||
/////// uint tests ///////
|
||||
|
||||
if( gTypesToTest & kTestUInt )
|
||||
{
|
||||
cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
|
||||
if( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
|
||||
{
|
||||
log_info( "No formats supported for unsigned int type\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only filter mode we support on uint is nearest
|
||||
imageSampler.filter_mode = CL_FILTER_NEAREST;
|
||||
ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kUInt, imageType );
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
1169
test_conformance/images/kernel_read_write/test_read_1D.cpp
Normal file
1169
test_conformance/images/kernel_read_write/test_read_1D.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1284
test_conformance/images/kernel_read_write/test_read_1D_array.cpp
Normal file
1284
test_conformance/images/kernel_read_write/test_read_1D_array.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1502
test_conformance/images/kernel_read_write/test_read_2D_array.cpp
Normal file
1502
test_conformance/images/kernel_read_write/test_read_2D_array.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1328
test_conformance/images/kernel_read_write/test_read_3D.cpp
Normal file
1328
test_conformance/images/kernel_read_write/test_read_3D.cpp
Normal file
File diff suppressed because it is too large
Load Diff
696
test_conformance/images/kernel_read_write/test_write_1D.cpp
Normal file
696
test_conformance/images/kernel_read_write/test_write_1D.cpp
Normal file
@@ -0,0 +1,696 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ERR 0.005f
|
||||
|
||||
extern cl_command_queue queue;
|
||||
extern cl_context context;
|
||||
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
|
||||
extern cl_filter_mode gFilterModeToSkip;
|
||||
extern cl_mem_flags gMemFlagsToUse;
|
||||
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
const char *readwrite1DKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0);\n"
|
||||
" int offset = tidX;\n"
|
||||
" write_image%s( output, tidX %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *write1DKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0);\n"
|
||||
" int offset = tidX;\n"
|
||||
" write_image%s( output, tidX %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
|
||||
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
int totalErrors = 0;
|
||||
size_t num_flags = 0;
|
||||
const cl_mem_flags *mem_flag_types = NULL;
|
||||
const char * *mem_flag_names = NULL;
|
||||
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
|
||||
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
|
||||
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
|
||||
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
mem_flag_types = write_only_mem_flag_types;
|
||||
mem_flag_names = write_only_mem_flag_names;
|
||||
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flag_types = read_write_mem_flag_types;
|
||||
mem_flag_names = read_write_mem_flag_names;
|
||||
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
|
||||
}
|
||||
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
|
||||
{
|
||||
int error;
|
||||
size_t threads[2];
|
||||
bool verifyRounding = false;
|
||||
int totalErrors = 0;
|
||||
int forceCorrectlyRoundedWrites = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
|
||||
cl_device_type type = 0;
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
|
||||
{
|
||||
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
|
||||
return 1;
|
||||
}
|
||||
if( type == CL_DEVICE_TYPE_CPU )
|
||||
forceCorrectlyRoundedWrites = 1;
|
||||
#endif
|
||||
|
||||
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
if( DetectFloatToHalfRoundingMode(queue) )
|
||||
return 1;
|
||||
|
||||
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
|
||||
|
||||
create_random_image_data( inputType, imageInfo, imageValues, d );
|
||||
|
||||
if(!gTestMipmaps)
|
||||
{
|
||||
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
|
||||
{
|
||||
/* Pilot data for sRGB images */
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// We want to generate ints (mostly) in range of the target format which should be [0,255]
|
||||
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
|
||||
// it can test some out-of-range data points
|
||||
const unsigned int test_range_ext = 16;
|
||||
int formatMin = 0 - test_range_ext;
|
||||
int formatMax = 255 + test_range_ext;
|
||||
int pixel_value = 0;
|
||||
float *inputValues = NULL;
|
||||
|
||||
// First, fill with arbitrary floats
|
||||
{
|
||||
inputValues = (float *)(char*)imageValues;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
{
|
||||
pixel_value = random_in_range( formatMin, (int)formatMax, d );
|
||||
inputValues[ i ] = (float)(pixel_value/255.0f);
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
|
||||
// Piloting some debug inputs.
|
||||
inputValues[ i++ ] = -0.5f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// First, fill with arbitrary floats
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = -0.0000000000009f;
|
||||
inputValues[ i++ ] = 1.f;
|
||||
inputValues[ i++ ] = -1.f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
verifyRounding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( inputType == kUInt )
|
||||
{
|
||||
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = 0;
|
||||
inputValues[ i++ ] = 65535;
|
||||
inputValues[ i++ ] = 7271820;
|
||||
inputValues[ i++ ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct testing sources
|
||||
clProtectedImage protImage;
|
||||
clMemWrapper unprotImage;
|
||||
cl_mem image;
|
||||
|
||||
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
|
||||
// Do not use protected images for max image size test since it rounds the row size to a page size
|
||||
if (gTestMaxImages) {
|
||||
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
|
||||
|
||||
unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
|
||||
imageInfo->width, 0,
|
||||
maxImageUseHostPtrBackingStore, NULL, &error );
|
||||
} else {
|
||||
error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width );
|
||||
}
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
|
||||
if (gTestMaxImages)
|
||||
image = (cl_mem)unprotImage;
|
||||
else
|
||||
image = (cl_mem)protImage;
|
||||
}
|
||||
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
|
||||
{
|
||||
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
|
||||
// it works just as if no flag is specified, so we just do the same thing either way
|
||||
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
|
||||
if( gTestMipmaps )
|
||||
{
|
||||
cl_image_desc image_desc = {0};
|
||||
image_desc.image_type = imageInfo->type;
|
||||
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
||||
image_desc.image_width = imageInfo->width;
|
||||
image_desc.image_array_size = imageInfo->arraySize;
|
||||
|
||||
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
|
||||
imageInfo->format, &image_desc, NULL, &error);
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create %d level 1D image of size %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width,
|
||||
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
|
||||
imageInfo->width, 0,
|
||||
imageValues, NULL, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
image = unprotImage;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
size_t width_lod = imageInfo->width, nextLevelOffset = 0;
|
||||
size_t origin[ 3 ] = { 0, 0, 0 };
|
||||
size_t region[ 3 ] = { imageInfo->width, 1, 1 };
|
||||
size_t resultSize;
|
||||
|
||||
for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
|
||||
}
|
||||
|
||||
clMemWrapper inputStream;
|
||||
|
||||
char *imagePtrOffset = imageValues + nextLevelOffset;
|
||||
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
|
||||
get_explicit_type_size( inputType ) * 4 * width_lod, imagePtrOffset, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
// Set arguments
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
// Run the kernel
|
||||
threads[0] = (size_t)width_lod;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
// Get results
|
||||
if( gTestMipmaps )
|
||||
resultSize = width_lod * get_pixel_size( imageInfo->format );
|
||||
else
|
||||
resultSize = imageInfo->rowPitch;
|
||||
clProtectedArray PA(resultSize);
|
||||
char *resultValues = (char *)((void *)PA);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
|
||||
|
||||
origin[ 1 ] = lod;
|
||||
region[ 0 ] = width_lod;
|
||||
|
||||
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
if( gDebugTrace )
|
||||
log_info( " results read\n" );
|
||||
|
||||
// Validate results element by element
|
||||
char *imagePtr = imageValues + nextLevelOffset;
|
||||
int numTries = 5;
|
||||
{
|
||||
char *resultPtr = (char *)resultValues;
|
||||
for( size_t x = 0, i = 0; x < width_lod; x++, i++ )
|
||||
{
|
||||
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
|
||||
|
||||
// Convert this pixel
|
||||
if( inputType == kFloat )
|
||||
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else if( inputType == kInt )
|
||||
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else // if( inputType == kUInt )
|
||||
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
|
||||
// Compare against the results
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// Compare sRGB-mapped values
|
||||
cl_float expected[4] = {0};
|
||||
cl_float* input_values = (float*)imagePtr;
|
||||
cl_uchar *actual = (cl_uchar*)resultPtr;
|
||||
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
|
||||
float err[4] = {0.0f};
|
||||
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if(j < 3)
|
||||
{
|
||||
expected[j] = sRGBmap(input_values[j]);
|
||||
}
|
||||
else // there is no sRGB conversion for alpha component if it exists
|
||||
{
|
||||
expected[j] = NORMALIZE(input_values[j], 255.0f);
|
||||
}
|
||||
|
||||
err[j] = fabsf( expected[ j ] - actual[ j ] );
|
||||
}
|
||||
|
||||
if ((err[0] > max_err) ||
|
||||
(err[1] > max_err) ||
|
||||
(err[2] > max_err) ||
|
||||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
|
||||
{
|
||||
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
|
||||
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
|
||||
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
|
||||
{
|
||||
// Compare floats
|
||||
float *expected = (float *)resultBuffer;
|
||||
float *actual = (float *)resultPtr;
|
||||
float err = 0.f;
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
|
||||
|
||||
err /= (float)get_format_channel_count( imageInfo->format );
|
||||
if( err > MAX_ERR )
|
||||
{
|
||||
unsigned int *e = (unsigned int *)expected;
|
||||
unsigned int *a = (unsigned int *)actual;
|
||||
log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Error: %g\n", err );
|
||||
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
|
||||
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
|
||||
totalErrors++;
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
{
|
||||
// Compare half floats
|
||||
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
cl_ushort *e = (cl_ushort *)resultBuffer;
|
||||
cl_ushort *a = (cl_ushort *)resultPtr;
|
||||
int err_cnt = 0;
|
||||
|
||||
//Fix up cases where we have NaNs
|
||||
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
|
||||
continue;
|
||||
if( e[j] != a[j] )
|
||||
err_cnt++;
|
||||
}
|
||||
|
||||
if( err_cnt )
|
||||
{
|
||||
totalErrors++;
|
||||
log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
|
||||
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
|
||||
if( inputType == kFloat )
|
||||
{
|
||||
float *p = (float *)(char *)imagePtr;
|
||||
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
}
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exact result passes every time
|
||||
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
// result is inexact. Calculate error
|
||||
int failure = 1;
|
||||
float errors[4] = {NAN, NAN, NAN, NAN};
|
||||
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
|
||||
|
||||
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
|
||||
if( 0 == forceCorrectlyRoundedWrites &&
|
||||
(
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
||||
))
|
||||
{
|
||||
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
|
||||
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
|
||||
failure = 0;
|
||||
}
|
||||
|
||||
|
||||
if( failure )
|
||||
{
|
||||
totalErrors++;
|
||||
// Is it our special rounding test?
|
||||
if( verifyRounding && i >= 1 && i <= 2 )
|
||||
{
|
||||
// Try to guess what the rounding mode of the device really is based on what it returned
|
||||
const char *deviceRounding = "unknown";
|
||||
unsigned int deviceResults[8];
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
|
||||
|
||||
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
|
||||
deviceRounding = "truncate";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to nearest";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to even";
|
||||
|
||||
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
|
||||
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
|
||||
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
|
||||
return 1;
|
||||
}
|
||||
log_error( "ERROR: Sample %d (%d) did not validate!\n", (int)i, (int)x );
|
||||
switch(imageInfo->format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_SIGNED_INT32:
|
||||
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
|
||||
break;
|
||||
case CL_FLOAT:
|
||||
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
|
||||
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
}
|
||||
|
||||
float *v = (float *)(char *)imagePtr;
|
||||
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
imagePtr += get_explicit_type_size( inputType ) * 4;
|
||||
resultPtr += get_pixel_size( imageInfo->format );
|
||||
}
|
||||
}
|
||||
{
|
||||
nextLevelOffset += width_lod * get_pixel_size( imageInfo->format );
|
||||
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All done!
|
||||
return totalErrors;
|
||||
}
|
||||
|
||||
|
||||
int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
char programSrc[10240];
|
||||
const char *ptr;
|
||||
const char *readFormat;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
const char *KernelSourcePattern = NULL;
|
||||
int error;
|
||||
|
||||
// Get our operating parameters
|
||||
size_t maxWidth;
|
||||
cl_ulong maxAllocSize, memSize;
|
||||
size_t pixelSize;
|
||||
|
||||
image_descriptor imageInfo = { 0x0 };
|
||||
|
||||
imageInfo.format = format;
|
||||
imageInfo.slicePitch = imageInfo.arraySize = 0;
|
||||
imageInfo.height = imageInfo.depth = 1;
|
||||
imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
|
||||
pixelSize = get_pixel_size( imageInfo.format );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||
test_error( error, "Unable to get max image 2D size from device" );
|
||||
|
||||
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||
memSize = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// Determine types
|
||||
if( inputType == kInt )
|
||||
readFormat = "i";
|
||||
else if( inputType == kUInt )
|
||||
readFormat = "ui";
|
||||
else // kFloat
|
||||
readFormat = "f";
|
||||
|
||||
// Construct the source
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
KernelSourcePattern = write1DKernelSourcePattern;
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelSourcePattern = readwrite1DKernelSourcePattern;
|
||||
}
|
||||
|
||||
sprintf( programSrc,
|
||||
KernelSourcePattern,
|
||||
get_explicit_type_name( inputType ),
|
||||
gTestMipmaps ? ", int lod" : "",
|
||||
readFormat,
|
||||
gTestMipmaps ? ", lod" :"" );
|
||||
|
||||
ptr = programSrc;
|
||||
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Run tests
|
||||
if( gTestSmallImages )
|
||||
{
|
||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
|
||||
if(gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d\n", (int)imageInfo.width );
|
||||
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestMaxImages )
|
||||
{
|
||||
// Try a specific set of maximum sizes
|
||||
size_t numbeOfSizes;
|
||||
size_t sizes[100][3];
|
||||
|
||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format, CL_TRUE);
|
||||
|
||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||
{
|
||||
imageInfo.width = sizes[ idx ][ 0 ];
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
if(gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
log_info("Testing %d\n", (int)imageInfo.width);
|
||||
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestRounding )
|
||||
{
|
||||
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
|
||||
imageInfo.width = typeRange / 256;
|
||||
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||
{
|
||||
cl_ulong size;
|
||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||
do
|
||||
{
|
||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
|
||||
|
||||
if( gTestMipmaps)
|
||||
{
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
if( gEnablePitch )
|
||||
{
|
||||
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.rowPitch += extraWidth * pixelSize;
|
||||
}
|
||||
|
||||
size = (size_t)imageInfo.rowPitch * 4;
|
||||
}
|
||||
} while( size > maxAllocSize || ( size * 3 ) > memSize );
|
||||
|
||||
if( gDebugTrace )
|
||||
{
|
||||
log_info( " at size %d (pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
|
||||
if( gTestMipmaps )
|
||||
log_info( " and %d mip levels\n", (int)imageInfo.num_mip_levels );
|
||||
}
|
||||
|
||||
int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,723 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ERR 0.005f
|
||||
|
||||
extern cl_command_queue queue;
|
||||
extern cl_context context;
|
||||
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
|
||||
extern cl_filter_mode gFilterModeToSkip;
|
||||
extern cl_mem_flags gMemFlagsToUse;
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
const char *readwrite1DArrayKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *write1DArrayKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *offset1DArraySource =
|
||||
" int offset = tidY*get_image_width(output) + tidX;\n";
|
||||
|
||||
const char *offset1DArrayLodSource =
|
||||
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
|
||||
" int offset = tidY*width_lod + tidX;\n";
|
||||
|
||||
int test_write_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
|
||||
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
int totalErrors = 0;
|
||||
size_t num_flags = 0;
|
||||
const cl_mem_flags *mem_flag_types = NULL;
|
||||
const char * *mem_flag_names = NULL;
|
||||
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
|
||||
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
|
||||
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
|
||||
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
mem_flag_types = write_only_mem_flag_types;
|
||||
mem_flag_names = write_only_mem_flag_names;
|
||||
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flag_types = read_write_mem_flag_types;
|
||||
mem_flag_names = read_write_mem_flag_names;
|
||||
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
|
||||
}
|
||||
|
||||
size_t pixelSize = get_pixel_size( imageInfo->format );
|
||||
|
||||
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
|
||||
{
|
||||
int error;
|
||||
size_t threads[2];
|
||||
bool verifyRounding = false;
|
||||
int totalErrors = 0;
|
||||
int forceCorrectlyRoundedWrites = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
|
||||
cl_device_type type = 0;
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
|
||||
{
|
||||
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
|
||||
return 1;
|
||||
}
|
||||
if( type == CL_DEVICE_TYPE_CPU )
|
||||
forceCorrectlyRoundedWrites = 1;
|
||||
#endif
|
||||
|
||||
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
if( DetectFloatToHalfRoundingMode(queue) )
|
||||
return 1;
|
||||
|
||||
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
|
||||
|
||||
create_random_image_data( inputType, imageInfo, imageValues, d );
|
||||
|
||||
if(!gTestMipmaps)
|
||||
{
|
||||
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
|
||||
{
|
||||
/* Pilot data for sRGB images */
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// We want to generate ints (mostly) in range of the target format which should be [0,255]
|
||||
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
|
||||
// it can test some out-of-range data points
|
||||
const unsigned int test_range_ext = 16;
|
||||
int formatMin = 0 - test_range_ext;
|
||||
int formatMax = 255 + test_range_ext;
|
||||
int pixel_value = 0;
|
||||
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t y = 0; y < imageInfo->arraySize; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
{
|
||||
pixel_value = random_in_range( formatMin, (int)formatMax, d );
|
||||
inputValues[ i ] = (float)(pixel_value/255.0f);
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
|
||||
// Piloting some debug inputs.
|
||||
inputValues[ i++ ] = -0.5f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t y = 0; y < imageInfo->arraySize; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = -0.0000000000009f;
|
||||
inputValues[ i++ ] = 1.f;
|
||||
inputValues[ i++ ] = -1.f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
verifyRounding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( inputType == kUInt )
|
||||
{
|
||||
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = 0;
|
||||
inputValues[ i++ ] = 65535;
|
||||
inputValues[ i++ ] = 7271820;
|
||||
inputValues[ i++ ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct testing sources
|
||||
clProtectedImage protImage;
|
||||
clMemWrapper unprotImage;
|
||||
cl_mem image;
|
||||
|
||||
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
|
||||
// Do not use protected images for max image size test since it rounds the row size to a page size
|
||||
if (gTestMaxImages) {
|
||||
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
|
||||
|
||||
unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
|
||||
imageInfo->width, imageInfo->arraySize, 0, 0,
|
||||
maxImageUseHostPtrBackingStore, &error );
|
||||
} else {
|
||||
error = protImage.Create( context, (cl_mem_object_type)CL_MEM_OBJECT_IMAGE1D_ARRAY, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, 1, 1, imageInfo->arraySize );
|
||||
}
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
|
||||
if (gTestMaxImages)
|
||||
image = (cl_mem)unprotImage;
|
||||
else
|
||||
image = (cl_mem)protImage;
|
||||
}
|
||||
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
|
||||
{
|
||||
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
|
||||
// it works just as if no flag is specified, so we just do the same thing either way
|
||||
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
|
||||
if( gTestMipmaps )
|
||||
{
|
||||
cl_image_desc image_desc = {0};
|
||||
image_desc.image_type = imageInfo->type;
|
||||
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
||||
image_desc.image_width = imageInfo->width;
|
||||
image_desc.image_array_size = imageInfo->arraySize;
|
||||
|
||||
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
|
||||
imageInfo->format, &image_desc, NULL, &error);
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create %d level 1D image array of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->arraySize,
|
||||
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
|
||||
imageInfo->width, imageInfo->arraySize, 0, 0,
|
||||
imageValues, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
image = unprotImage;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
size_t width_lod = imageInfo->width, nextLevelOffset = 0;
|
||||
size_t origin[ 3 ] = { 0, 0, 0 };
|
||||
size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
|
||||
size_t resultSize;
|
||||
|
||||
for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
|
||||
|
||||
}
|
||||
// Run the kernel
|
||||
threads[0] = (size_t)width_lod;
|
||||
threads[1] = (size_t)imageInfo->arraySize;
|
||||
|
||||
clMemWrapper inputStream;
|
||||
|
||||
char *imagePtrOffset = imageValues + nextLevelOffset;
|
||||
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
|
||||
get_explicit_type_size( inputType ) * 4 * width_lod * imageInfo->arraySize, imagePtrOffset, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
// Set arguments
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
// Get results
|
||||
if( gTestMipmaps )
|
||||
resultSize = width_lod * get_pixel_size(imageInfo->format) * imageInfo->arraySize;
|
||||
else
|
||||
resultSize = imageInfo->rowPitch * imageInfo->arraySize;
|
||||
|
||||
clProtectedArray PA(resultSize);
|
||||
char *resultValues = (char *)((void *)PA);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
|
||||
|
||||
|
||||
origin[2] = lod;
|
||||
region[0] = width_lod;
|
||||
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region,
|
||||
gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
if( gDebugTrace )
|
||||
log_info( " results read\n" );
|
||||
|
||||
// Validate results element by element
|
||||
char *imagePtr = imageValues + nextLevelOffset;
|
||||
int numTries = 5;
|
||||
for( size_t y = 0, i = 0; y < imageInfo->arraySize; y++ )
|
||||
{
|
||||
char *resultPtr;
|
||||
if( gTestMipmaps )
|
||||
resultPtr = (char *)resultValues + y * width_lod * pixelSize;
|
||||
else
|
||||
resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
|
||||
for( size_t x = 0; x < width_lod; x++, i++ )
|
||||
{
|
||||
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
|
||||
|
||||
// Convert this pixel
|
||||
if( inputType == kFloat )
|
||||
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else if( inputType == kInt )
|
||||
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else // if( inputType == kUInt )
|
||||
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
|
||||
// Compare against the results
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// Compare sRGB-mapped values
|
||||
cl_float expected[4] = {0};
|
||||
cl_float* input_values = (float*)imagePtr;
|
||||
cl_uchar *actual = (cl_uchar*)resultPtr;
|
||||
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
|
||||
float err[4] = {0.0f};
|
||||
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if(j < 3)
|
||||
{
|
||||
expected[j] = sRGBmap(input_values[j]);
|
||||
}
|
||||
else // there is no sRGB conversion for alpha component if it exists
|
||||
{
|
||||
expected[j] = NORMALIZE(input_values[j], 255.0f);
|
||||
}
|
||||
|
||||
err[j] = fabsf( expected[ j ] - actual[ j ] );
|
||||
}
|
||||
|
||||
if ((err[0] > max_err) ||
|
||||
(err[1] > max_err) ||
|
||||
(err[2] > max_err) ||
|
||||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
|
||||
{
|
||||
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
|
||||
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
|
||||
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
|
||||
{
|
||||
// Compare floats
|
||||
float *expected = (float *)resultBuffer;
|
||||
float *actual = (float *)resultPtr;
|
||||
float err = 0.f;
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
|
||||
|
||||
err /= (float)get_format_channel_count( imageInfo->format );
|
||||
if( err > MAX_ERR )
|
||||
{
|
||||
unsigned int *e = (unsigned int *)expected;
|
||||
unsigned int *a = (unsigned int *)actual;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Error: %g\n", err );
|
||||
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
|
||||
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
|
||||
totalErrors++;
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
{
|
||||
|
||||
// Compare half floats
|
||||
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
cl_ushort *e = (cl_ushort *)resultBuffer;
|
||||
cl_ushort *a = (cl_ushort *)resultPtr;
|
||||
int err_cnt = 0;
|
||||
|
||||
//Fix up cases where we have NaNs
|
||||
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
|
||||
continue;
|
||||
if( e[j] != a[j] )
|
||||
err_cnt++;
|
||||
}
|
||||
|
||||
if( err_cnt )
|
||||
{
|
||||
totalErrors++;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
|
||||
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
|
||||
if( inputType == kFloat )
|
||||
{
|
||||
float *p = (float *)(char *)imagePtr;
|
||||
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
}
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exact result passes every time
|
||||
if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
|
||||
{
|
||||
// result is inexact. Calculate error
|
||||
int failure = 1;
|
||||
float errors[4] = {NAN, NAN, NAN, NAN};
|
||||
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
|
||||
|
||||
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
|
||||
if( 0 == forceCorrectlyRoundedWrites &&
|
||||
(
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
||||
))
|
||||
{
|
||||
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
|
||||
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
|
||||
failure = 0;
|
||||
}
|
||||
|
||||
|
||||
if( failure )
|
||||
{
|
||||
totalErrors++;
|
||||
// Is it our special rounding test?
|
||||
if( verifyRounding && i >= 1 && i <= 2 )
|
||||
{
|
||||
// Try to guess what the rounding mode of the device really is based on what it returned
|
||||
const char *deviceRounding = "unknown";
|
||||
unsigned int deviceResults[8];
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
|
||||
|
||||
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
|
||||
deviceRounding = "truncate";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to nearest";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to even";
|
||||
|
||||
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
|
||||
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
|
||||
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
|
||||
return 1;
|
||||
}
|
||||
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
|
||||
switch(imageInfo->format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_SIGNED_INT32:
|
||||
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
|
||||
break;
|
||||
case CL_FLOAT:
|
||||
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
|
||||
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
}
|
||||
|
||||
float *v = (float *)(char *)imagePtr;
|
||||
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
imagePtr += get_explicit_type_size( inputType ) * 4;
|
||||
resultPtr += pixelSize;
|
||||
}
|
||||
}
|
||||
{
|
||||
nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
|
||||
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All done!
|
||||
return totalErrors;
|
||||
}
|
||||
|
||||
|
||||
int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
char programSrc[10240];
|
||||
const char *ptr;
|
||||
const char *readFormat;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
const char *KernelSourcePattern = NULL;
|
||||
int error;
|
||||
|
||||
// Get our operating parameters
|
||||
size_t maxWidth, maxArraySize;
|
||||
cl_ulong maxAllocSize, memSize;
|
||||
size_t pixelSize;
|
||||
|
||||
image_descriptor imageInfo = { 0x0 };
|
||||
|
||||
imageInfo.format = format;
|
||||
imageInfo.slicePitch = 0;
|
||||
imageInfo.height = imageInfo.depth = 1;
|
||||
imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
pixelSize = get_pixel_size( imageInfo.format );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||
test_error( error, "Unable to get max image 2D size from device" );
|
||||
|
||||
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||
memSize = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// Determine types
|
||||
if( inputType == kInt )
|
||||
readFormat = "i";
|
||||
else if( inputType == kUInt )
|
||||
readFormat = "ui";
|
||||
else // kFloat
|
||||
readFormat = "f";
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
KernelSourcePattern = write1DArrayKernelSourcePattern;
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelSourcePattern = readwrite1DArrayKernelSourcePattern;
|
||||
}
|
||||
// Construct the source
|
||||
// Construct the source
|
||||
sprintf( programSrc,
|
||||
KernelSourcePattern,
|
||||
get_explicit_type_name( inputType ),
|
||||
gTestMipmaps ? ", int lod" : "",
|
||||
gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource,
|
||||
readFormat,
|
||||
gTestMipmaps ? ", lod" :"" );
|
||||
|
||||
ptr = programSrc;
|
||||
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Run tests
|
||||
if( gTestSmallImages )
|
||||
{
|
||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
imageInfo.slicePitch = imageInfo.rowPitch;
|
||||
for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
|
||||
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( gTestMaxImages )
|
||||
{
|
||||
// Try a specific set of maximum sizes
|
||||
size_t numbeOfSizes;
|
||||
size_t sizes[100][3];
|
||||
|
||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format, CL_TRUE);
|
||||
|
||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||
{
|
||||
imageInfo.width = sizes[ idx ][ 0 ];
|
||||
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
imageInfo.slicePitch = imageInfo.rowPitch;
|
||||
if(gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
|
||||
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestRounding )
|
||||
{
|
||||
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
|
||||
imageInfo.arraySize = typeRange / 256;
|
||||
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
|
||||
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
imageInfo.slicePitch = imageInfo.rowPitch;
|
||||
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||
{
|
||||
cl_ulong size;
|
||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||
do
|
||||
{
|
||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
|
||||
imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
|
||||
|
||||
if( gTestMipmaps)
|
||||
{
|
||||
imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
|
||||
size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||
if( gEnablePitch )
|
||||
{
|
||||
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.rowPitch += extraWidth * pixelSize;
|
||||
}
|
||||
imageInfo.slicePitch = imageInfo.rowPitch;
|
||||
|
||||
size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
|
||||
}
|
||||
} while( size > maxAllocSize || ( size * 3 ) > memSize );
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
|
||||
|
||||
int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,771 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ERR 0.005f
|
||||
|
||||
extern cl_command_queue queue;
|
||||
extern cl_context context;
|
||||
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
|
||||
extern cl_filter_mode gFilterModeToSkip;
|
||||
extern cl_mem_flags gMemFlagsToUse;
|
||||
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
||||
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
||||
|
||||
// Utility function to clamp down image sizes for certain tests to avoid
|
||||
// using too much memory.
|
||||
static size_t reduceImageSizeRange(size_t maxDimSize) {
|
||||
size_t DimSize = maxDimSize/32;
|
||||
if (DimSize < (size_t) 16)
|
||||
return 16;
|
||||
else if (DimSize > (size_t) 128)
|
||||
return 128;
|
||||
else
|
||||
return DimSize;
|
||||
}
|
||||
|
||||
static size_t reduceImageDepth(size_t maxDepth) {
|
||||
size_t Depth = maxDepth/32;
|
||||
if (Depth < (size_t) 8)
|
||||
return 8;
|
||||
else if (Depth > (size_t) 32)
|
||||
return 32;
|
||||
else
|
||||
return Depth;
|
||||
}
|
||||
|
||||
const char *write2DArrayKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *readwrite2DArrayKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
|
||||
"}";
|
||||
|
||||
const char *offset2DArrayKernelSource =
|
||||
" int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
|
||||
|
||||
const char *offset2DArrayLodKernelSource =
|
||||
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
|
||||
" int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
|
||||
" int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
|
||||
|
||||
int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
|
||||
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
int totalErrors = 0;
|
||||
|
||||
size_t num_flags = 0;
|
||||
const cl_mem_flags *mem_flag_types = NULL;
|
||||
const char * *mem_flag_names = NULL;
|
||||
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
|
||||
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
|
||||
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
|
||||
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
mem_flag_types = write_only_mem_flag_types;
|
||||
mem_flag_names = write_only_mem_flag_names;
|
||||
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flag_types = read_write_mem_flag_types;
|
||||
mem_flag_names = read_write_mem_flag_names;
|
||||
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
|
||||
}
|
||||
|
||||
size_t pixelSize = get_pixel_size( imageInfo->format );
|
||||
|
||||
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
|
||||
{
|
||||
int error;
|
||||
size_t threads[3];
|
||||
bool verifyRounding = false;
|
||||
int totalErrors = 0;
|
||||
int forceCorrectlyRoundedWrites = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
|
||||
cl_device_type type = 0;
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
|
||||
{
|
||||
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
|
||||
return 1;
|
||||
}
|
||||
if( type == CL_DEVICE_TYPE_CPU )
|
||||
forceCorrectlyRoundedWrites = 1;
|
||||
#endif
|
||||
|
||||
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
if( DetectFloatToHalfRoundingMode(queue) )
|
||||
return 1;
|
||||
|
||||
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
|
||||
|
||||
create_random_image_data( inputType, imageInfo, imageValues, d );
|
||||
|
||||
if(!gTestMipmaps)
|
||||
{
|
||||
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
|
||||
{
|
||||
/* Pilot data for sRGB images */
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// We want to generate ints (mostly) in range of the target format which should be [0,255]
|
||||
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
|
||||
// it can test some out-of-range data points
|
||||
const unsigned int test_range_ext = 16;
|
||||
int formatMin = 0 - test_range_ext;
|
||||
int formatMax = 255 + test_range_ext;
|
||||
int pixel_value = 0;
|
||||
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t z = 0; z < imageInfo->arraySize; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
{
|
||||
pixel_value = random_in_range( formatMin, (int)formatMax, d );
|
||||
inputValues[ i ] = (float)(pixel_value/255.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
// Piloting some debug inputs.
|
||||
inputValues[ i++ ] = -0.5f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t z = 0; z < imageInfo->arraySize; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = -0.0000000000009f;
|
||||
inputValues[ i++ ] = 1.f;
|
||||
inputValues[ i++ ] = -1.f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
verifyRounding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( inputType == kUInt )
|
||||
{
|
||||
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = 0;
|
||||
inputValues[ i++ ] = 65535;
|
||||
inputValues[ i++ ] = 7271820;
|
||||
inputValues[ i++ ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct testing sources
|
||||
clProtectedImage protImage;
|
||||
clMemWrapper unprotImage;
|
||||
cl_mem image;
|
||||
|
||||
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
|
||||
|
||||
unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
|
||||
maxImageUseHostPtrBackingStore, &error );
|
||||
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
|
||||
return error;
|
||||
}
|
||||
|
||||
image = (cl_mem)unprotImage;
|
||||
}
|
||||
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
|
||||
{
|
||||
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
|
||||
// it works just as if no flag is specified, so we just do the same thing either way
|
||||
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
|
||||
if( gTestMipmaps )
|
||||
{
|
||||
cl_image_desc image_desc = {0};
|
||||
image_desc.image_type = imageInfo->type;
|
||||
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
||||
image_desc.image_width = imageInfo->width;
|
||||
image_desc.image_height = imageInfo->height;
|
||||
image_desc.image_array_size = imageInfo->arraySize;
|
||||
|
||||
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
|
||||
imageInfo->format, &image_desc, NULL, &error);
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create %d level 2D image array of size %ld x %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->arraySize,
|
||||
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
image = unprotImage;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
|
||||
size_t origin[ 4 ] = { 0, 0, 0, 0 };
|
||||
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
|
||||
size_t resultSize;
|
||||
|
||||
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
|
||||
for( int lod = 0; lod < num_lod_loops; lod++)
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
|
||||
}
|
||||
// Run the kernel
|
||||
threads[0] = (size_t)width_lod;
|
||||
threads[1] = (size_t)height_lod;
|
||||
threads[2] = (size_t)imageInfo->arraySize;
|
||||
|
||||
clMemWrapper inputStream;
|
||||
|
||||
char *imagePtrOffset = imageValues + nextLevelOffset;
|
||||
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
|
||||
get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * imageInfo->arraySize, imagePtrOffset, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
// Set arguments
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
// Get results
|
||||
if( gTestMipmaps )
|
||||
resultSize = width_lod * height_lod *imageInfo->arraySize * pixelSize;
|
||||
else
|
||||
resultSize = imageInfo->slicePitch *imageInfo->arraySize;
|
||||
clProtectedArray PA(resultSize);
|
||||
char *resultValues = (char *)((void *)PA);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
|
||||
|
||||
origin[3] = lod;
|
||||
region[0] = width_lod;
|
||||
region[1] = height_lod;
|
||||
|
||||
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
if( gDebugTrace )
|
||||
log_info( " results read\n" );
|
||||
|
||||
// Validate results element by element
|
||||
char *imagePtr = imageValues + nextLevelOffset;
|
||||
int numTries = 5;
|
||||
for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < height_lod; y++ )
|
||||
{
|
||||
char *resultPtr;
|
||||
if( gTestMipmaps )
|
||||
resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
|
||||
else
|
||||
resultPtr = (char*)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
|
||||
for( size_t x = 0; x < width_lod; x++, i++ )
|
||||
{
|
||||
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
|
||||
|
||||
// Convert this pixel
|
||||
if( inputType == kFloat )
|
||||
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else if( inputType == kInt )
|
||||
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else // if( inputType == kUInt )
|
||||
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
|
||||
// Compare against the results
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// Compare sRGB-mapped values
|
||||
cl_float expected[4] = {0};
|
||||
cl_float* input_values = (float*)imagePtr;
|
||||
cl_uchar *actual = (cl_uchar*)resultPtr;
|
||||
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
|
||||
float err[4] = {0.0f};
|
||||
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if(j < 3)
|
||||
{
|
||||
expected[j] = sRGBmap(input_values[j]);
|
||||
}
|
||||
else // there is no sRGB conversion for alpha component if it exists
|
||||
{
|
||||
expected[j] = NORMALIZE(input_values[j], 255.0f);
|
||||
}
|
||||
|
||||
err[j] = fabsf( expected[ j ] - actual[ j ] );
|
||||
}
|
||||
|
||||
if ((err[0] > max_err) ||
|
||||
(err[1] > max_err) ||
|
||||
(err[2] > max_err) ||
|
||||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
|
||||
{
|
||||
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
|
||||
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
|
||||
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
|
||||
{
|
||||
// Compare floats
|
||||
float *expected = (float *)resultBuffer;
|
||||
float *actual = (float *)resultPtr;
|
||||
float err = 0.f;
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
|
||||
|
||||
err /= (float)get_format_channel_count( imageInfo->format );
|
||||
if( err > MAX_ERR )
|
||||
{
|
||||
unsigned int *e = (unsigned int *)expected;
|
||||
unsigned int *a = (unsigned int *)actual;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Error: %g\n", err );
|
||||
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
|
||||
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
|
||||
totalErrors++;
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
{
|
||||
// Compare half floats
|
||||
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
cl_ushort *e = (cl_ushort *)resultBuffer;
|
||||
cl_ushort *a = (cl_ushort *)resultPtr;
|
||||
int err_cnt = 0;
|
||||
|
||||
//Fix up cases where we have NaNs
|
||||
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
|
||||
continue;
|
||||
if( e[j] != a[j] )
|
||||
err_cnt++;
|
||||
}
|
||||
|
||||
if( err_cnt )
|
||||
{
|
||||
totalErrors++;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
unsigned short *e = (unsigned short *)resultBuffer;
|
||||
unsigned short *a = (unsigned short *)resultPtr;
|
||||
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
|
||||
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
|
||||
if( inputType == kFloat )
|
||||
{
|
||||
float *p = (float *)(char *)imagePtr;
|
||||
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
}
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exact result passes every time
|
||||
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
// result is inexact. Calculate error
|
||||
int failure = 1;
|
||||
float errors[4] = {NAN, NAN, NAN, NAN};
|
||||
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
|
||||
|
||||
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
|
||||
if( 0 == forceCorrectlyRoundedWrites &&
|
||||
(
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
||||
))
|
||||
{
|
||||
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
|
||||
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
|
||||
failure = 0;
|
||||
}
|
||||
|
||||
|
||||
if( failure )
|
||||
{
|
||||
totalErrors++;
|
||||
// Is it our special rounding test?
|
||||
if( verifyRounding && i >= 1 && i <= 2 )
|
||||
{
|
||||
// Try to guess what the rounding mode of the device really is based on what it returned
|
||||
const char *deviceRounding = "unknown";
|
||||
unsigned int deviceResults[8];
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
|
||||
|
||||
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
|
||||
deviceRounding = "truncate";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to nearest";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to even";
|
||||
|
||||
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
|
||||
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
|
||||
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
|
||||
return 1;
|
||||
}
|
||||
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
|
||||
switch(imageInfo->format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_SIGNED_INT32:
|
||||
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
|
||||
break;
|
||||
case CL_FLOAT:
|
||||
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
|
||||
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
}
|
||||
|
||||
float *v = (float *)(char *)imagePtr;
|
||||
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
imagePtr += get_explicit_type_size( inputType ) * (( imageInfo->format->image_channel_order == CL_DEPTH ) ? 1 : 4);
|
||||
resultPtr += get_pixel_size( imageInfo->format );
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
nextLevelOffset += width_lod*height_lod*imageInfo->arraySize*pixelSize;
|
||||
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
|
||||
height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// All done!
|
||||
return totalErrors;
|
||||
}
|
||||
|
||||
|
||||
int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
char programSrc[10240];
|
||||
const char *ptr;
|
||||
const char *readFormat;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
const char *KernelSourcePattern = NULL;
|
||||
int error;
|
||||
|
||||
// Get our operating parameters
|
||||
size_t maxWidth, maxHeight, maxArraySize;
|
||||
cl_ulong maxAllocSize, memSize;
|
||||
|
||||
image_descriptor imageInfo = { 0x0 };
|
||||
|
||||
imageInfo.format = format;
|
||||
imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||
imageInfo.depth = 1;
|
||||
imageInfo.slicePitch = 0;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||
test_error( error, "Unable to get max image 3D size from device" );
|
||||
|
||||
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||
memSize = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// Determine types
|
||||
if( inputType == kInt )
|
||||
readFormat = "i";
|
||||
else if( inputType == kUInt )
|
||||
readFormat = "ui";
|
||||
else // kFloat
|
||||
readFormat = "f";
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
KernelSourcePattern = write2DArrayKernelSourcePattern;
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelSourcePattern = readwrite2DArrayKernelSourcePattern;
|
||||
}
|
||||
// Construct the source
|
||||
// Construct the source
|
||||
sprintf( programSrc,
|
||||
KernelSourcePattern,
|
||||
get_explicit_type_name( inputType ),
|
||||
(format->image_channel_order == CL_DEPTH) ? "" : "4",
|
||||
(format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
|
||||
gTestMipmaps ? " , int lod" : "",
|
||||
gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
|
||||
readFormat,
|
||||
gTestMipmaps ? ", lod" : "" );
|
||||
|
||||
ptr = programSrc;
|
||||
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Run tests
|
||||
if( gTestSmallImages )
|
||||
{
|
||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
|
||||
{
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
|
||||
{
|
||||
if( gTestMipmaps )
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
|
||||
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( gTestMaxImages )
|
||||
{
|
||||
// Try a specific set of maximum sizes
|
||||
size_t numbeOfSizes;
|
||||
size_t sizes[100][3];
|
||||
|
||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
|
||||
|
||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||
{
|
||||
imageInfo.width = sizes[ idx ][ 0 ];
|
||||
imageInfo.height = sizes[ idx ][ 1 ];
|
||||
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
if( gTestMipmaps )
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
|
||||
log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
|
||||
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestRounding )
|
||||
{
|
||||
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
|
||||
imageInfo.height = typeRange / 256;
|
||||
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
|
||||
imageInfo.arraySize = 2;
|
||||
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||
{
|
||||
int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
|
||||
int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
|
||||
int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
|
||||
|
||||
cl_ulong size, buffSize;
|
||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||
do
|
||||
{
|
||||
imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
|
||||
imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, d );
|
||||
imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
|
||||
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1), d);
|
||||
//Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
|
||||
size = 4 * compute_mipmapped_image_size(imageInfo);
|
||||
buffSize = size * get_explicit_type_size( inputType );
|
||||
}
|
||||
else
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
if( gEnablePitch )
|
||||
{
|
||||
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
|
||||
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
|
||||
}
|
||||
|
||||
|
||||
// Image size and buffer size may differ due to different pixel size.
|
||||
// See creation of buffer at line ~153.
|
||||
size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4;
|
||||
buffSize = (cl_ulong)imageInfo.width * (cl_ulong)imageInfo.height * imageInfo.arraySize * get_explicit_type_size(inputType) * 4;
|
||||
}
|
||||
} while( size > maxAllocSize || buffSize > maxAllocSize || ( size * 3 ) > memSize );
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize,
|
||||
imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
|
||||
|
||||
int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
768
test_conformance/images/kernel_read_write/test_write_3D.cpp
Normal file
768
test_conformance/images/kernel_read_write/test_write_3D.cpp
Normal file
@@ -0,0 +1,768 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ERR 0.005f
|
||||
|
||||
extern cl_command_queue queue;
|
||||
extern cl_context context;
|
||||
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
|
||||
extern cl_filter_mode gFilterModeToSkip;
|
||||
extern cl_mem_flags gMemFlagsToUse;
|
||||
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
||||
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
||||
|
||||
// Utility function to clamp down image sizes for certain tests to avoid
|
||||
// using too much memory.
|
||||
static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
|
||||
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
|
||||
if (DimSize > (size_t) 128)
|
||||
return 128;
|
||||
else
|
||||
return DimSize;
|
||||
}
|
||||
|
||||
static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
|
||||
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
|
||||
if (DimSize > (size_t) 32)
|
||||
return 32;
|
||||
else
|
||||
return DimSize;
|
||||
}
|
||||
|
||||
|
||||
const char *write3DKernelSourcePattern =
|
||||
"%s"
|
||||
"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *readwrite3DKernelSourcePattern =
|
||||
"%s"
|
||||
"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *khr3DWritesPragma =
|
||||
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
|
||||
|
||||
const char *offset3DSource=
|
||||
" int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
|
||||
|
||||
const char *offset3DLodSource =
|
||||
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
|
||||
" int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
|
||||
" int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
|
||||
|
||||
int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
|
||||
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
int totalErrors = 0;
|
||||
|
||||
size_t num_flags = 0;
|
||||
const cl_mem_flags *mem_flag_types = NULL;
|
||||
const char * *mem_flag_names = NULL;
|
||||
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
|
||||
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
|
||||
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
|
||||
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
mem_flag_types = write_only_mem_flag_types;
|
||||
mem_flag_names = write_only_mem_flag_names;
|
||||
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flag_types = read_write_mem_flag_types;
|
||||
mem_flag_names = read_write_mem_flag_names;
|
||||
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
|
||||
}
|
||||
|
||||
size_t pixelSize = get_pixel_size( imageInfo->format );
|
||||
|
||||
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
|
||||
{
|
||||
int error;
|
||||
size_t threads[3];
|
||||
bool verifyRounding = false;
|
||||
int totalErrors = 0;
|
||||
int forceCorrectlyRoundedWrites = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
|
||||
cl_device_type type = 0;
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
|
||||
{
|
||||
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
|
||||
return 1;
|
||||
}
|
||||
if( type == CL_DEVICE_TYPE_CPU )
|
||||
forceCorrectlyRoundedWrites = 1;
|
||||
#endif
|
||||
|
||||
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
if( DetectFloatToHalfRoundingMode(queue) )
|
||||
return 1;
|
||||
|
||||
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
|
||||
|
||||
create_random_image_data( inputType, imageInfo, imageValues, d );
|
||||
|
||||
if(!gTestMipmaps)
|
||||
{
|
||||
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
|
||||
{
|
||||
/* Pilot data for sRGB images */
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// We want to generate ints (mostly) in range of the target format which should be [0,255]
|
||||
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
|
||||
// it can test some out-of-range data points
|
||||
const unsigned int test_range_ext = 16;
|
||||
int formatMin = 0 - test_range_ext;
|
||||
int formatMax = 255 + test_range_ext;
|
||||
int pixel_value = 0;
|
||||
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t z = 0; z < imageInfo->depth; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
{
|
||||
pixel_value = random_in_range( formatMin, (int)formatMax, d );
|
||||
inputValues[ i ] = (float)(pixel_value/255.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
|
||||
// Piloting some debug inputs.
|
||||
inputValues[ i++ ] = -0.5f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t z = 0; z < imageInfo->depth; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = -0.0000000000009f;
|
||||
inputValues[ i++ ] = 1.f;
|
||||
inputValues[ i++ ] = -1.f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
verifyRounding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( inputType == kUInt )
|
||||
{
|
||||
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = 0;
|
||||
inputValues[ i++ ] = 65535;
|
||||
inputValues[ i++ ] = 7271820;
|
||||
inputValues[ i++ ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct testing sources
|
||||
clProtectedImage protImage;
|
||||
clMemWrapper unprotImage;
|
||||
cl_mem image;
|
||||
|
||||
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
|
||||
|
||||
unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
|
||||
maxImageUseHostPtrBackingStore, &error );
|
||||
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
|
||||
return error;
|
||||
}
|
||||
|
||||
image = (cl_mem)unprotImage;
|
||||
}
|
||||
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
|
||||
{
|
||||
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
|
||||
// it works just as if no flag is specified, so we just do the same thing either way
|
||||
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
cl_image_desc image_desc = {0};
|
||||
image_desc.image_type = imageInfo->type;
|
||||
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
||||
image_desc.image_width = imageInfo->width;
|
||||
image_desc.image_height = imageInfo->height;
|
||||
image_desc.image_depth = imageInfo->depth;
|
||||
|
||||
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
|
||||
imageInfo->format, &image_desc, NULL, &error);
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
|
||||
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
image = unprotImage;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
size_t width_lod = imageInfo->width;
|
||||
size_t height_lod = imageInfo->height;
|
||||
size_t depth_lod = imageInfo->depth;
|
||||
size_t nextLevelOffset = 0;
|
||||
size_t origin[ 4 ] = { 0, 0, 0, 0 };
|
||||
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
|
||||
|
||||
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
|
||||
for( int lod = 0; lod < num_lod_loops; lod++)
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
|
||||
}
|
||||
// Run the kernel
|
||||
threads[0] = (size_t)width_lod;
|
||||
threads[1] = (size_t)height_lod;
|
||||
threads[2] = (size_t)depth_lod;
|
||||
|
||||
clMemWrapper inputStream;
|
||||
|
||||
char *imagePtrOffset = imageValues + nextLevelOffset;
|
||||
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
|
||||
get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * depth_lod, imagePtrOffset, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
// Set arguments
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
// Get results
|
||||
size_t resultSize;
|
||||
if(gTestMipmaps)
|
||||
resultSize = width_lod * height_lod * depth_lod * pixelSize;
|
||||
else
|
||||
resultSize = imageInfo->slicePitch *imageInfo->depth;
|
||||
clProtectedArray PA(resultSize);
|
||||
char *resultValues = (char *)((void *)PA);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
|
||||
|
||||
origin[3] = lod;
|
||||
region[0] = width_lod;
|
||||
region[1] = height_lod;
|
||||
region[2] = depth_lod;
|
||||
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
if( gDebugTrace )
|
||||
log_info( " results read\n" );
|
||||
|
||||
// Validate results element by element
|
||||
char *imagePtr = (char*)imageValues + nextLevelOffset;
|
||||
int numTries = 5;
|
||||
for( size_t z = 0, i = 0; z < depth_lod; z++ )
|
||||
{
|
||||
for( size_t y = 0; y < height_lod; y++ )
|
||||
{
|
||||
char *resultPtr;
|
||||
if( gTestMipmaps )
|
||||
resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
|
||||
else
|
||||
resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
|
||||
for( size_t x = 0; x < width_lod; x++, i++ )
|
||||
{
|
||||
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
|
||||
|
||||
// Convert this pixel
|
||||
if( inputType == kFloat )
|
||||
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else if( inputType == kInt )
|
||||
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else // if( inputType == kUInt )
|
||||
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
|
||||
// Compare against the results
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// Compare sRGB-mapped values
|
||||
cl_float expected[4] = {0};
|
||||
cl_float* input_values = (float*)imagePtr;
|
||||
cl_uchar *actual = (cl_uchar*)resultPtr;
|
||||
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
|
||||
float err[4] = {0.0f};
|
||||
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if(j < 3)
|
||||
{
|
||||
expected[j] = sRGBmap(input_values[j]);
|
||||
}
|
||||
else // there is no sRGB conversion for alpha component if it exists
|
||||
{
|
||||
expected[j] = NORMALIZE(input_values[j], 255.0f);
|
||||
}
|
||||
|
||||
err[j] = fabsf( expected[ j ] - actual[ j ] );
|
||||
}
|
||||
|
||||
if ((err[0] > max_err) ||
|
||||
(err[1] > max_err) ||
|
||||
(err[2] > max_err) ||
|
||||
(err[3] > FLT_EPSILON)) // there is no conversion for alpha
|
||||
{
|
||||
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
|
||||
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
|
||||
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
|
||||
{
|
||||
// Compare floats
|
||||
float *expected = (float *)resultBuffer;
|
||||
float *actual = (float *)resultPtr;
|
||||
float err = 0.f;
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
|
||||
|
||||
err /= (float)get_format_channel_count( imageInfo->format );
|
||||
if( err > MAX_ERR )
|
||||
{
|
||||
unsigned int *e = (unsigned int *)expected;
|
||||
unsigned int *a = (unsigned int *)actual;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Error: %g\n", err );
|
||||
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
|
||||
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
|
||||
totalErrors++;
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
{
|
||||
// Compare half floats
|
||||
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
cl_ushort *e = (cl_ushort *)resultBuffer;
|
||||
cl_ushort *a = (cl_ushort *)resultPtr;
|
||||
int err_cnt = 0;
|
||||
|
||||
//Fix up cases where we have NaNs
|
||||
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
|
||||
continue;
|
||||
if( e[j] != a[j] )
|
||||
err_cnt++;
|
||||
}
|
||||
|
||||
if( err_cnt )
|
||||
{
|
||||
totalErrors++;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
unsigned short *e = (unsigned short *)resultBuffer;
|
||||
unsigned short *a = (unsigned short *)resultPtr;
|
||||
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
|
||||
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
|
||||
if( inputType == kFloat )
|
||||
{
|
||||
float *p = (float *)(char *)imagePtr;
|
||||
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
}
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exact result passes every time
|
||||
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
// result is inexact. Calculate error
|
||||
int failure = 1;
|
||||
float errors[4] = {NAN, NAN, NAN, NAN};
|
||||
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
|
||||
|
||||
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
|
||||
if( 0 == forceCorrectlyRoundedWrites &&
|
||||
(
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
||||
))
|
||||
{
|
||||
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
|
||||
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
|
||||
failure = 0;
|
||||
}
|
||||
|
||||
|
||||
if( failure )
|
||||
{
|
||||
totalErrors++;
|
||||
// Is it our special rounding test?
|
||||
if( verifyRounding && i >= 1 && i <= 2 )
|
||||
{
|
||||
// Try to guess what the rounding mode of the device really is based on what it returned
|
||||
const char *deviceRounding = "unknown";
|
||||
unsigned int deviceResults[8];
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
|
||||
|
||||
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
|
||||
deviceRounding = "truncate";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to nearest";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to even";
|
||||
|
||||
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
|
||||
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
|
||||
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
|
||||
return 1;
|
||||
}
|
||||
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
|
||||
switch(imageInfo->format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_SIGNED_INT32:
|
||||
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
|
||||
break;
|
||||
case CL_FLOAT:
|
||||
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
|
||||
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
}
|
||||
|
||||
float *v = (float *)(char *)imagePtr;
|
||||
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
imagePtr += get_explicit_type_size( inputType ) * 4;
|
||||
resultPtr += get_pixel_size( imageInfo->format );
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
|
||||
width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
|
||||
height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
|
||||
depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// All done!
|
||||
return totalErrors;
|
||||
}
|
||||
|
||||
|
||||
int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
char programSrc[10240];
|
||||
const char *ptr;
|
||||
const char *readFormat;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
const char *KernelSourcePattern = NULL;
|
||||
|
||||
int error;
|
||||
|
||||
// Get our operating parameters
|
||||
size_t maxWidth, maxHeight, maxDepth;
|
||||
cl_ulong maxAllocSize, memSize;
|
||||
|
||||
image_descriptor imageInfo = { 0x0 };
|
||||
|
||||
imageInfo.format = format;
|
||||
imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||
test_error( error, "Unable to get max image 3D size from device" );
|
||||
|
||||
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||
memSize = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// Determine types
|
||||
if( inputType == kInt )
|
||||
readFormat = "i";
|
||||
else if( inputType == kUInt )
|
||||
readFormat = "ui";
|
||||
else // kFloat
|
||||
readFormat = "f";
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
KernelSourcePattern = write3DKernelSourcePattern;
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelSourcePattern = readwrite3DKernelSourcePattern;
|
||||
}
|
||||
|
||||
// Construct the source
|
||||
sprintf( programSrc,
|
||||
KernelSourcePattern,
|
||||
gTestMipmaps ? "" : khr3DWritesPragma,
|
||||
get_explicit_type_name( inputType ),
|
||||
gTestMipmaps ? ", int lod" : "",
|
||||
gTestMipmaps ? offset3DLodSource : offset3DSource,
|
||||
readFormat,
|
||||
gTestMipmaps ? ", lod" : "" );
|
||||
|
||||
ptr = programSrc;
|
||||
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Run tests
|
||||
if( gTestSmallImages )
|
||||
{
|
||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
|
||||
{
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
|
||||
{
|
||||
if (gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
|
||||
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( gTestMaxImages )
|
||||
{
|
||||
// Try a specific set of maximum sizes
|
||||
size_t numbeOfSizes;
|
||||
size_t sizes[100][3];
|
||||
|
||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
|
||||
|
||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||
{
|
||||
imageInfo.width = sizes[ idx ][ 0 ];
|
||||
imageInfo.height = sizes[ idx ][ 1 ];
|
||||
imageInfo.depth = sizes[ idx ][ 2 ];
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
if (gTestMipmaps)
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
|
||||
log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
|
||||
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestRounding )
|
||||
{
|
||||
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
|
||||
imageInfo.height = typeRange / 256;
|
||||
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
|
||||
imageInfo.depth = 1;
|
||||
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||
{
|
||||
cl_ulong size;
|
||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||
do
|
||||
{
|
||||
imageInfo.width = reduceImageSizeRange(maxWidth, d );
|
||||
imageInfo.height = reduceImageSizeRange(maxHeight, d );
|
||||
imageInfo.depth = reduceImageDepth(maxDepth, d );
|
||||
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
|
||||
//Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
|
||||
size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
|
||||
}
|
||||
else
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
if( gEnablePitch )
|
||||
{
|
||||
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
|
||||
|
||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||
extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
|
||||
}
|
||||
|
||||
size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
|
||||
}
|
||||
} while( size > maxAllocSize || ( size * 3 ) > memSize );
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
|
||||
imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
|
||||
|
||||
int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
887
test_conformance/images/kernel_read_write/test_write_image.cpp
Normal file
887
test_conformance/images/kernel_read_write/test_write_image.cpp
Normal file
@@ -0,0 +1,887 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ERR 0.005f
|
||||
|
||||
extern cl_command_queue queue;
|
||||
extern cl_context context;
|
||||
extern bool gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestImage2DFromBuffer, gTestMipmaps;
|
||||
extern cl_filter_mode gFilterModeToSkip;
|
||||
extern cl_mem_flags gMemFlagsToUse;
|
||||
extern int gtestTypesToRun;
|
||||
|
||||
extern int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
|
||||
extern int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
|
||||
extern int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
|
||||
extern int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
|
||||
|
||||
|
||||
const char *writeKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
|
||||
"}";
|
||||
|
||||
const char *read_writeKernelSourcePattern =
|
||||
"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
|
||||
"%s"
|
||||
" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
|
||||
"}";
|
||||
|
||||
const char *offset2DKernelSource =
|
||||
" int offset = tidY*get_image_width(output) + tidX;\n";
|
||||
|
||||
const char *offset2DLodKernelSource =
|
||||
" int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
|
||||
" int offset = tidY * width_lod + tidX;\n";
|
||||
|
||||
int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
|
||||
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
int totalErrors = 0;
|
||||
size_t num_flags = 0;
|
||||
const cl_mem_flags *mem_flag_types = NULL;
|
||||
const char * *mem_flag_names = NULL;
|
||||
const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
|
||||
const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
|
||||
const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
|
||||
const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
mem_flag_types = write_only_mem_flag_types;
|
||||
mem_flag_names = write_only_mem_flag_names;
|
||||
num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_flag_types = read_write_mem_flag_types;
|
||||
mem_flag_names = read_write_mem_flag_names;
|
||||
num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
|
||||
}
|
||||
|
||||
size_t pixelSize = get_pixel_size( imageInfo->format );
|
||||
int channel_scale = (imageInfo->format->image_channel_order == CL_DEPTH) ? 1 : 4;
|
||||
|
||||
for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
|
||||
{
|
||||
int error;
|
||||
size_t threads[2];
|
||||
bool verifyRounding = false;
|
||||
int totalErrors = 0;
|
||||
int forceCorrectlyRoundedWrites = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// Require Apple's CPU implementation to be correctly rounded, not just within 0.6
|
||||
cl_device_type type = 0;
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
|
||||
{
|
||||
log_error("Error: Could not get device type for Apple device! (%d) \n", error );
|
||||
return 1;
|
||||
}
|
||||
if( type == CL_DEVICE_TYPE_CPU )
|
||||
forceCorrectlyRoundedWrites = 1;
|
||||
#endif
|
||||
|
||||
if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
if( DetectFloatToHalfRoundingMode(queue) )
|
||||
return 1;
|
||||
|
||||
BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues, imageBufferValues;
|
||||
|
||||
create_random_image_data( inputType, imageInfo, imageValues, d, gTestImage2DFromBuffer );
|
||||
|
||||
if(!gTestMipmaps)
|
||||
{
|
||||
if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
|
||||
{
|
||||
/* Pilot data for sRGB images */
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// We want to generate ints (mostly) in range of the target format which should be [0,255]
|
||||
// However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
|
||||
// it can test some out-of-range data points
|
||||
const unsigned int test_range_ext = 16;
|
||||
int formatMin = 0 - test_range_ext;
|
||||
int formatMax = 255 + test_range_ext;
|
||||
int pixel_value = 0;
|
||||
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
|
||||
for( size_t i = 0; i < imageInfo->width * 4; i++ )
|
||||
{
|
||||
pixel_value = random_in_range( formatMin, (int)formatMax, d );
|
||||
inputValues[ i ] = (float)(pixel_value/255.0f);
|
||||
}
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
|
||||
// Piloting some debug inputs.
|
||||
inputValues[ i++ ] = -0.5f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
inputValues[ i++ ] = 2.0f;
|
||||
inputValues[ i++ ] = 0.5f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// First, fill with arbitrary floats
|
||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
||||
{
|
||||
float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * channel_scale;
|
||||
for( size_t i = 0; i < imageInfo->width * channel_scale; i++ )
|
||||
inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
|
||||
}
|
||||
|
||||
// Throw a few extra test values in there
|
||||
float *inputValues = (float *)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = -0.0000000000009f;
|
||||
inputValues[ i++ ] = 1.f;
|
||||
inputValues[ i++ ] = -1.f;
|
||||
inputValues[ i++ ] = 2.f;
|
||||
|
||||
// Also fill in the first few vectors with some deliberate tests to determine the rounding mode
|
||||
// is correct
|
||||
if( imageInfo->width > 12 )
|
||||
{
|
||||
float formatMax = (float)get_format_max_int( imageInfo->format );
|
||||
inputValues[ i++ ] = 4.0f / formatMax;
|
||||
inputValues[ i++ ] = 4.3f / formatMax;
|
||||
inputValues[ i++ ] = 4.5f / formatMax;
|
||||
inputValues[ i++ ] = 4.7f / formatMax;
|
||||
inputValues[ i++ ] = 5.0f / formatMax;
|
||||
inputValues[ i++ ] = 5.3f / formatMax;
|
||||
inputValues[ i++ ] = 5.5f / formatMax;
|
||||
inputValues[ i++ ] = 5.7f / formatMax;
|
||||
verifyRounding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( inputType == kUInt )
|
||||
{
|
||||
unsigned int *inputValues = (unsigned int*)(char*)imageValues;
|
||||
size_t i = 0;
|
||||
inputValues[ i++ ] = 0;
|
||||
inputValues[ i++ ] = 65535;
|
||||
inputValues[ i++ ] = 7271820;
|
||||
inputValues[ i++ ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Construct testing sources
|
||||
clProtectedImage protImage;
|
||||
clMemWrapper unprotImage;
|
||||
cl_mem image;
|
||||
cl_mem imageBuffer;
|
||||
|
||||
if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
if (gTestImage2DFromBuffer)
|
||||
{
|
||||
imageBuffer = clCreateBuffer( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR,
|
||||
imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
|
||||
test_error( error, "Unable to create buffer" );
|
||||
unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->rowPitch,
|
||||
imageBuffer, &error );
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
|
||||
// Do not use protected images for max image size test since it rounds the row size to a page size
|
||||
if (gTestMaxImages) {
|
||||
create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
|
||||
|
||||
unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, 0,
|
||||
maxImageUseHostPtrBackingStore, &error );
|
||||
} else {
|
||||
error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
|
||||
}
|
||||
}
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
if (gTestImage2DFromBuffer) {
|
||||
clReleaseMemObject(imageBuffer);
|
||||
if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
|
||||
log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
|
||||
if (gTestMaxImages || gTestImage2DFromBuffer)
|
||||
image = (cl_mem)unprotImage;
|
||||
else
|
||||
image = (cl_mem)protImage;
|
||||
}
|
||||
else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
|
||||
{
|
||||
if( gTestMipmaps )
|
||||
{
|
||||
cl_image_desc image_desc = {0};
|
||||
image_desc.image_type = imageInfo->type;
|
||||
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
||||
image_desc.image_width = imageInfo->width;
|
||||
image_desc.image_height = imageInfo->height;
|
||||
|
||||
unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
|
||||
imageInfo->format, &image_desc, NULL, &error);
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create %d level 2D image of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height,
|
||||
IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
else if (gTestImage2DFromBuffer)
|
||||
{
|
||||
generate_random_image_data( imageInfo, imageBufferValues, d );
|
||||
imageBuffer = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
|
||||
imageInfo->rowPitch * imageInfo->height, imageBufferValues, &error);
|
||||
test_error( error, "Unable to create buffer" );
|
||||
unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, imageInfo->rowPitch,
|
||||
imageBuffer, &error );
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
|
||||
// it works just as if no flag is specified, so we just do the same thing either way
|
||||
// Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
|
||||
unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
|
||||
imageInfo->width, imageInfo->height, 0,
|
||||
imageValues, &error );
|
||||
}
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
if (gTestImage2DFromBuffer) {
|
||||
clReleaseMemObject(imageBuffer);
|
||||
if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
|
||||
log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
|
||||
imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
|
||||
return error;
|
||||
}
|
||||
image = unprotImage;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
|
||||
size_t origin[ 3 ] = { 0, 0, 0 };
|
||||
size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
|
||||
size_t resultSize;
|
||||
|
||||
int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
|
||||
for( int lod = 0; lod < num_lod_loops; lod++)
|
||||
{
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
|
||||
}
|
||||
// Run the kernel
|
||||
threads[0] = (size_t)width_lod;
|
||||
threads[1] = (size_t)height_lod;
|
||||
|
||||
clMemWrapper inputStream;
|
||||
|
||||
char *imagePtrOffset = imageValues + nextLevelOffset;
|
||||
|
||||
inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
|
||||
get_explicit_type_size( inputType ) * channel_scale * width_lod * height_lod, imagePtrOffset, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
// Set arguments
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
// Get results
|
||||
if( gTestMipmaps )
|
||||
resultSize = width_lod * height_lod * get_pixel_size(imageInfo->format);
|
||||
else
|
||||
resultSize = imageInfo->rowPitch * imageInfo->height;
|
||||
clProtectedArray PA(resultSize);
|
||||
char *resultValues = (char *)((void *)PA);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
|
||||
|
||||
origin[2] = lod;
|
||||
region[0] = width_lod;
|
||||
region[1] = height_lod;
|
||||
error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
if( gDebugTrace )
|
||||
log_info( " results read\n" );
|
||||
|
||||
// Validate results element by element
|
||||
char *imagePtr = (char*)imageValues + nextLevelOffset;
|
||||
int numTries = 5;
|
||||
for( size_t y = 0, i = 0; y < height_lod; y++ )
|
||||
{
|
||||
char *resultPtr;
|
||||
if( gTestMipmaps )
|
||||
resultPtr = (char *)resultValues + y * width_lod * pixelSize;
|
||||
else
|
||||
resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
|
||||
for( size_t x = 0; x < width_lod; x++, i++ )
|
||||
{
|
||||
char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
|
||||
|
||||
// Convert this pixel
|
||||
if( inputType == kFloat )
|
||||
pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else if( inputType == kInt )
|
||||
pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
else // if( inputType == kUInt )
|
||||
pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
|
||||
|
||||
// Compare against the results
|
||||
if(is_sRGBA_order(imageInfo->format->image_channel_order))
|
||||
{
|
||||
// Compare sRGB-mapped values
|
||||
cl_float expected[4] = {0};
|
||||
cl_float* input_values = (float*)imagePtr;
|
||||
cl_uchar *actual = (cl_uchar*)resultPtr;
|
||||
float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
|
||||
float err[4] = {0.0f};
|
||||
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if(j < 3)
|
||||
{
|
||||
expected[j] = sRGBmap(input_values[j]);
|
||||
}
|
||||
else // there is no sRGB conversion for alpha component if it exists
|
||||
{
|
||||
expected[j] = NORMALIZE(input_values[j], 255.0f);
|
||||
}
|
||||
|
||||
err[j] = fabsf( expected[ j ] - actual[ j ] );
|
||||
}
|
||||
|
||||
if ((err[0] > max_err) ||
|
||||
(err[1] > max_err) ||
|
||||
(err[2] > max_err) ||
|
||||
(err[3] > 0)) // there is no conversion for alpha so the error should be zero
|
||||
{
|
||||
log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
|
||||
log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
|
||||
log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
|
||||
{
|
||||
// Compare floats
|
||||
float *expected = (float *)resultBuffer;
|
||||
float *actual = (float *)resultPtr;
|
||||
float err = 0.f;
|
||||
for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
|
||||
|
||||
err /= (float)get_format_channel_count( imageInfo->format );
|
||||
if( err > MAX_ERR )
|
||||
{
|
||||
unsigned int *e = (unsigned int *)expected;
|
||||
unsigned int *a = (unsigned int *)actual;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Error: %g\n", err );
|
||||
log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
|
||||
log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
|
||||
log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
|
||||
log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
|
||||
totalErrors++;
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
|
||||
{
|
||||
|
||||
// Compare half floats
|
||||
if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
cl_ushort *e = (cl_ushort *)resultBuffer;
|
||||
cl_ushort *a = (cl_ushort *)resultPtr;
|
||||
int err_cnt = 0;
|
||||
|
||||
//Fix up cases where we have NaNs
|
||||
for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
|
||||
{
|
||||
if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
|
||||
continue;
|
||||
if( e[j] != a[j] )
|
||||
err_cnt++;
|
||||
}
|
||||
|
||||
if( err_cnt )
|
||||
{
|
||||
totalErrors++;
|
||||
log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
|
||||
log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
|
||||
if( inputType == kFloat )
|
||||
{
|
||||
float *p = (float *)(char *)imagePtr;
|
||||
log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
}
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exact result passes every time
|
||||
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
|
||||
{
|
||||
// result is inexact. Calculate error
|
||||
int failure = 1;
|
||||
float errors[4] = {NAN, NAN, NAN, NAN};
|
||||
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
|
||||
|
||||
// We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
|
||||
if( 0 == forceCorrectlyRoundedWrites &&
|
||||
(
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
|
||||
imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
||||
))
|
||||
{
|
||||
if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
|
||||
! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
|
||||
failure = 0;
|
||||
}
|
||||
|
||||
|
||||
if( failure )
|
||||
{
|
||||
totalErrors++;
|
||||
// Is it our special rounding test?
|
||||
if( verifyRounding && i >= 1 && i <= 2 )
|
||||
{
|
||||
// Try to guess what the rounding mode of the device really is based on what it returned
|
||||
const char *deviceRounding = "unknown";
|
||||
unsigned int deviceResults[8];
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
|
||||
read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
|
||||
|
||||
if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
|
||||
deviceRounding = "truncate";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to nearest";
|
||||
else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
|
||||
deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
|
||||
deviceRounding = "round to even";
|
||||
|
||||
log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
|
||||
log_error( " Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
|
||||
deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
|
||||
log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
|
||||
return 1;
|
||||
}
|
||||
log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
|
||||
switch(imageInfo->format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNORM_INT_101010:
|
||||
log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_SIGNED_INT32:
|
||||
log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
|
||||
log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
|
||||
break;
|
||||
case CL_FLOAT:
|
||||
log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
|
||||
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
|
||||
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
|
||||
break;
|
||||
}
|
||||
|
||||
float *v = (float *)(char *)imagePtr;
|
||||
log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
|
||||
|
||||
if( ( --numTries ) == 0 )
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
imagePtr += get_explicit_type_size( inputType ) * channel_scale;
|
||||
resultPtr += get_pixel_size( imageInfo->format );
|
||||
}
|
||||
}
|
||||
{
|
||||
nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format);
|
||||
width_lod = (width_lod >> 1) ?(width_lod >> 1) : 1;
|
||||
height_lod = (height_lod >> 1) ?(height_lod >> 1) : 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
|
||||
}
|
||||
|
||||
|
||||
// All done!
|
||||
return totalErrors;
|
||||
}
|
||||
|
||||
|
||||
int test_write_image_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
|
||||
{
|
||||
char programSrc[10240];
|
||||
const char *ptr;
|
||||
const char *readFormat;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
const char *KernelSourcePattern = NULL;
|
||||
int error;
|
||||
|
||||
if (gTestImage2DFromBuffer)
|
||||
{
|
||||
if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
|
||||
{
|
||||
switch (format->image_channel_data_type)
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_UNSIGNED_INT32:
|
||||
log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
|
||||
GetChannelTypeName( format->image_channel_data_type ));
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get our operating parameters
|
||||
size_t maxWidth, maxHeight;
|
||||
cl_ulong maxAllocSize, memSize;
|
||||
|
||||
image_descriptor imageInfo = { 0x0 };
|
||||
|
||||
imageInfo.format = format;
|
||||
imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
|
||||
imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||
test_error( error, "Unable to get max image 2D size from device" );
|
||||
|
||||
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||
memSize = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// Determine types
|
||||
if( inputType == kInt )
|
||||
readFormat = "i";
|
||||
else if( inputType == kUInt )
|
||||
readFormat = "ui";
|
||||
else // kFloat
|
||||
readFormat = "f";
|
||||
|
||||
if(gtestTypesToRun & kWriteTests)
|
||||
{
|
||||
KernelSourcePattern = writeKernelSourcePattern;
|
||||
}
|
||||
else
|
||||
{
|
||||
KernelSourcePattern = read_writeKernelSourcePattern;
|
||||
}
|
||||
|
||||
// Construct the source
|
||||
sprintf( programSrc,
|
||||
KernelSourcePattern,
|
||||
get_explicit_type_name( inputType ),
|
||||
(format->image_channel_order == CL_DEPTH) ? "" : "4",
|
||||
(format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
|
||||
gTestMipmaps ? ", int lod" : "",
|
||||
gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
|
||||
readFormat,
|
||||
gTestMipmaps ? ", lod" : "" );
|
||||
|
||||
ptr = programSrc;
|
||||
error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Run tests
|
||||
if( gTestSmallImages )
|
||||
{
|
||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
|
||||
{
|
||||
if( gTestMipmaps )
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
|
||||
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( gTestMaxImages )
|
||||
{
|
||||
// Try a specific set of maximum sizes
|
||||
size_t numbeOfSizes;
|
||||
size_t sizes[100][3];
|
||||
|
||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
|
||||
|
||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||
{
|
||||
imageInfo.width = sizes[ idx ][ 0 ];
|
||||
imageInfo.height = sizes[ idx ][ 1 ];
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
if( gTestMipmaps )
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
|
||||
log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
|
||||
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
else if( gTestRounding )
|
||||
{
|
||||
size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
|
||||
imageInfo.height = typeRange / 256;
|
||||
imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
|
||||
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
cl_uint imagePitchAlign = 0;
|
||||
if (gTestImage2DFromBuffer)
|
||||
{
|
||||
#if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
|
||||
if (!imagePitchAlign)
|
||||
imagePitchAlign = 1;
|
||||
#endif
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
|
||||
}
|
||||
|
||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||
{
|
||||
cl_ulong size;
|
||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||
do
|
||||
{
|
||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
|
||||
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
|
||||
|
||||
if(gTestMipmaps)
|
||||
{
|
||||
imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, d);
|
||||
size = 4 * compute_mipmapped_image_size(imageInfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||
if( gEnablePitch )
|
||||
{
|
||||
size_t extraWidth = (int)random_log_in_range( 0, 64, d );
|
||||
imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
|
||||
}
|
||||
|
||||
// if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
|
||||
if (gTestImage2DFromBuffer)
|
||||
{
|
||||
size_t pitch = imagePitchAlign * get_pixel_size( imageInfo.format );
|
||||
imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
|
||||
}
|
||||
|
||||
size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
|
||||
}
|
||||
} while( size > maxAllocSize || ( size * 3 ) > memSize );
|
||||
|
||||
if( gDebugTrace )
|
||||
log_info( " at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
|
||||
|
||||
int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
|
||||
if( retCode )
|
||||
return retCode;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_write_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
|
||||
image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
|
||||
{
|
||||
if( imageSampler->filter_mode == CL_FILTER_LINEAR )
|
||||
// No need to run for linear filters
|
||||
return 0;
|
||||
|
||||
int ret = 0;
|
||||
|
||||
log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
|
||||
|
||||
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
for( unsigned int i = 0; i < numFormats; i++ )
|
||||
{
|
||||
cl_image_format &imageFormat = formatList[ i ];
|
||||
|
||||
if( filterFlags[ i ] )
|
||||
continue;
|
||||
|
||||
if (is_sRGBA_order(imageFormat.image_channel_order))
|
||||
{
|
||||
if( !is_extension_available( device, "cl_khr_srgb_image_writes" ))
|
||||
{
|
||||
log_missing_feature( "-----------------------------------------------------\n" );
|
||||
log_missing_feature( "WARNING!!! sRGB formats are shown in the supported write-format list.\n");
|
||||
log_missing_feature( "However the extension cl_khr_srgb_image_writes is not available.\n");
|
||||
log_missing_feature( "Please make sure the extension is officially supported by the device .\n");
|
||||
log_missing_feature( "-----------------------------------------------------\n\n" );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
gTestCount++;
|
||||
|
||||
print_write_header( &imageFormat, false );
|
||||
int retCode;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
retCode = test_write_image_1D_set( device, &imageFormat, inputType, seed );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
retCode = test_write_image_set( device, &imageFormat, inputType, seed );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
retCode = test_write_image_3D_set( device, &imageFormat, inputType, seed );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
retCode = test_write_image_1D_array_set( device, &imageFormat, inputType, seed );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
retCode = test_write_image_2D_array_set( device, &imageFormat, inputType, seed );
|
||||
break;
|
||||
}
|
||||
|
||||
if( retCode != 0 )
|
||||
{
|
||||
gTestFailure++;
|
||||
log_error( "FAILED: " );
|
||||
print_write_header( &imageFormat, true );
|
||||
log_info( "\n" );
|
||||
}
|
||||
ret += retCode;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user